ZFS root (builtin): Difference between revisions
No edit summary |
No edit summary |
||
Line 244: | Line 244: | ||
*/ | */ | ||
#define FOOBARZ_INIT_VERSION "1.0. | #define FOOBARZ_INIT_VERSION "1.0.9" | ||
#define _BSD_SOURCE | #define _BSD_SOURCE | ||
#include <stdio.h> | #include <stdio.h> | ||
Line 261: | Line 261: | ||
#define PARAM_REQ_YES 1 | #define PARAM_REQ_YES 1 | ||
#define PARAM_SRC_DEFAULT 0 | #define PARAM_SRC_DEFAULT 0 | ||
#define | #define PARAM_SRC_CMDLINE 1 | ||
/* Your initramfs-source should contain the following | /* Your initramfs-source should contain the following | ||
Line 282: | Line 282: | ||
void printk(char *fmt, ...) { | void printk(char *fmt, ...) { | ||
FILE* f; | FILE* f; | ||
int fd; | |||
va_list args; | va_list args; | ||
Line 290: | Line 291: | ||
fflush(f); | fflush(f); | ||
fclose(f); | fclose(f); | ||
/* avoid flooding kmsg and having msgs suppressed; 20msgs/sec */ | |||
usleep(50000); | usleep(50000); | ||
} | } | ||
int main(int argc, char* argv[]) { | int main(int argc, char* argv[]) { | ||
/ | /*** variables */ | ||
int i; | int i; | ||
int fd = 0; /* file descriptor */ | int fd = 0; /* file descriptor */ | ||
Line 306: | Line 307: | ||
char* cmdline_end; | char* cmdline_end; | ||
char* temp_end; | char* temp_end; | ||
char* src_msg; /* default or | char* src_msg; /* default or cmdline */ | ||
int flag_param_missing = 0; | int flag_param_missing = 0; | ||
Line 334: | Line 335: | ||
enum { iroot, irootfstype, imountopt, iinit, irunlevel, iconsole, ilastparam }; | enum { iroot, irootfstype, imountopt, iinit, irunlevel, iconsole, ilastparam }; | ||
/ | /*** program */ | ||
printk("foobarz-init, version %s: booting initramfs.\n", FOOBARZ_INIT_VERSION); | printk("foobarz-init, version %s: booting initramfs.\n", FOOBARZ_INIT_VERSION); | ||
Line 346: | Line 346: | ||
} | } | ||
/* mount proc | /* mount proc /proc | ||
* | * note: some /dev devices symlink into /proc | ||
* proc contains info about processes, including cmdline etc. */ | |||
printk("Attempting cmd: mount proc /proc\n"); | printk("Attempting cmd: mount proc /proc\n"); | ||
if( mount("proc", "/proc", "proc", 0, NULL) != 0 ) { | if( mount("proc", "/proc", "proc", 0, NULL) != 0 ) { | ||
Line 356: | Line 357: | ||
} | } | ||
/* mount devtmpfs as | /* mount devtmpfs /dev | ||
* and | * note: This simple init program works if your root device is made from devices | ||
* that are available by default in devtmpfs, such as /dev/sd* | |||
* | |||
* For zfs, your root zfs pool should be created with default device nodes and | |||
* then it should be mountable by this simple init program. | |||
* | |||
* udev may be needed to configure device nodes and symlinks required | |||
* to access a root device configuration made with such nodes and symlinks. | |||
* If you need udevd, you can include it into your initramfs-source and | |||
* modify this program to run it before attempting to mount your root device. | |||
* However, if udevd is needed, a significant number of userspace programs may also be | |||
* required by rules in /lib/udev/. You could install busybox + udev (about 5MB) or | |||
* coreutils + util-linux + bash + udev (about 25MB) into initramfs-source. But, at that | |||
* point you'd have ash or bash and many tools that are easier to use than this | |||
* simple init program; it would then be easy to have /init as #!/bin/<b>ash script. */ | |||
printk("Attempting cmd: mount devtmpfs /dev\n"); | printk("Attempting cmd: mount devtmpfs /dev\n"); | ||
if( mount("devtmpfs", "/dev", "devtmpfs", 0, NULL) != 0 ) { | if( mount("devtmpfs", "/dev", "devtmpfs", 0, NULL) != 0 ) { | ||
Line 365: | Line 380: | ||
printk("Mount devtmpfs successful.\n"); | printk("Mount devtmpfs successful.\n"); | ||
} | } | ||
/* | |||
* | /* mount sysfs /sys | ||
* | * note: some kernel modules try to access /sys with userspace helpers to echo values into /sys variables; | ||
* such modules expect a minimal userspace that contains coreutils or busybox */ | |||
printk("Attempting cmd: mount sysfs /sys\n"); | |||
if( mount("sysfs", "/sys", "sysfs", 0, NULL) != 0 ) { | |||
printk("time to panic: mount: %s\n", strerror(errno)); | |||
return EX_UNAVAILABLE; | |||
} else { | |||
printk("Mount sysfs successful.\n"); | |||
} | |||
/* process kernel command line */ | /* process kernel command line */ | ||
Line 405: | Line 425: | ||
param[i].v = strstr(cmdline, param[i].n); | param[i].v = strstr(cmdline, param[i].n); | ||
if( param[i].v != NULL ) { | if( param[i].v != NULL ) { | ||
param[i].src = | param[i].src = PARAM_SRC_CMDLINE; | ||
while( *(param[i].v) != '=' ) param[i].v++; | while( *(param[i].v) != '=' ) param[i].v++; | ||
param[i].v++; | param[i].v++; | ||
Line 425: | Line 445: | ||
/* set defaults if no value on cmdline */ | /* set defaults if no value on cmdline */ | ||
if( param[i].v == NULL ) { | if( param[i].v == NULL ) { | ||
param[i].src = PARAM_SRC_DEFAULT; | param[i].src = PARAM_SRC_DEFAULT; | ||
if( param[i].req == PARAM_REQ_YES ) flag_param_missing = 1; | if( param[i].req == PARAM_REQ_YES ) flag_param_missing = 1; | ||
switch(i) { | switch(i) { | ||
Line 448: | Line 468: | ||
/* generic nv pair kernel cmdline processing finished | /* generic nv pair kernel cmdline processing finished | ||
* now, examine specific params for defaults and correctness | * now, examine specific params for defaults and correctness */ | ||
/* param[irootfstype]: can be checked against /proc/filesystems: */ | /* param[irootfstype]: can be checked against /proc/filesystems: */ | ||
fd = open("/proc/filesystems", O_RDONLY); | fd = open("/proc/filesystems", O_RDONLY); | ||
if( fd == -1 ) { | if( fd == -1 ) { | ||
Line 471: | Line 487: | ||
} | } | ||
/* zfs-specific informative checks */ | |||
if( strcmp(param[irootfstype].v, "zfs") == 0 ) { | if( strcmp(param[irootfstype].v, "zfs") == 0 ) { | ||
if( access("/etc/zfs/zpool.cache", F_OK) == 0 ) | if( access("/etc/zfs/zpool.cache", F_OK) == 0 ) | ||
Line 476: | Line 493: | ||
else | else | ||
printk("rootfstype=%s: /etc/zfs/zpool.cache not present in initramfs.\n", param[irootfstype].v); | printk("rootfstype=%s: /etc/zfs/zpool.cache not present in initramfs.\n", param[irootfstype].v); | ||
if( access("/etc/hostid", F_OK) == 0 ) | if( access("/etc/hostid", F_OK) == 0 ) | ||
printk("rootfstype=%s: /etc/hostid is present in initramfs.\n", param[irootfstype].v); | printk("rootfstype=%s: /etc/hostid is present in initramfs.\n", param[irootfstype].v); | ||
Line 481: | Line 499: | ||
printk("rootfstype=%s: /etc/hostid not present in initramfs.\n", param[irootfstype].v); | printk("rootfstype=%s: /etc/hostid not present in initramfs.\n", param[irootfstype].v); | ||
} | } | ||
if( strcmp(param[imountopt].v, "ro") == 0 ) mountflags = MS_RDONLY; | if( strcmp(param[imountopt].v, "ro") == 0 ) mountflags = MS_RDONLY; | ||
Line 490: | Line 507: | ||
} | } | ||
/* param[iroot]: nothing to check; if user gives bad root=device then mount fails */ | |||
/* try to mount root=device at /mnt | |||
* | |||
* note: for zfs, if a copy of /etc/zfs/zpool.cache (when pool is imported) is put in initramfs-source, then | |||
* the zfs module can read it and automatically import the pools described in the cache file; the imported | |||
* pools can be available to mount here if they were created using standard device names, otherwise | |||
* udevd may be required to run before mounting the pool */ | |||
printk("Attempting cmd: mount -t %s -o %s %s /mnt.\n", param[irootfstype].v, param[imountopt].v, param[iroot].v); | printk("Attempting cmd: mount -t %s -o %s %s /mnt.\n", param[irootfstype].v, param[imountopt].v, param[iroot].v); | ||
if( mount(param[iroot].v, "/mnt", param[irootfstype].v, mountflags, NULL) != 0 ) { | if( mount(param[iroot].v, "/mnt", param[irootfstype].v, mountflags, NULL) != 0 ) { | ||
Line 497: | Line 522: | ||
printk("%s mounted successfully.\n", param[iroot].v); | printk("%s mounted successfully.\n", param[iroot].v); | ||
/* check to see if the mounted root filesystem has an executable init program */ | |||
chdir("/mnt"); | chdir("/mnt"); | ||
if( access(param[iinit].v+1, X_OK) != 0 ) { | if( access(param[iinit].v+1, X_OK) != 0 ) { | ||
Line 511: | Line 537: | ||
} | } | ||
chdir("/"); | chdir("/"); | ||
printk("Init program /mnt/%s is present and executable.\n", param[iinit].v+1); | |||
printk(" | /* switch the root / from initramfs to the mounted new root device at /mnt. | ||
* | |||
* note: after this switch, it is not possible to access the initramfs files anymore, | |||
* yet they consume ram memory unless they are deleted here before switching. | |||
* Any programs that are run after clearing the initramfs and switching root must exist on the new root. | |||
* This program may safely delete itself (/init) since it is already in ram and executing. | |||
* If you have installed additional files and programs in initramfs that consume significant ram, | |||
* then you need to insert additional code here to delete those files (carefully). */ | |||
/* delete files off of initramfs to free ram memory */ | |||
printk("Freeing memory from initramfs...\n"); | |||
if( unlink("/init") != 0 ) printk("unlink %s: %s\n", "/init", strerror(errno)); | |||
else printk("/init %s", "deleted from initramfs.\n"); | |||
/* switch root */ | |||
printk("Beginning switch root procedure.\n"); | printk("Beginning switch root procedure.\n"); | ||
Line 527: | Line 568: | ||
} | } | ||
printk("(3) Attempting cmd: chdir /mnt \n"); | printk("(3) Attempting cmd: mount --move /sys /mnt/sys \n"); | ||
if( mount("/sys", "/mnt/sys", NULL, MS_MOVE, NULL) != 0 ) { | |||
printk("time to panic: mount: %s\n", strerror(errno)); | |||
return EX_UNAVAILABLE; | |||
} | |||
printk("(4) Attempting cmd: chdir /mnt \n"); | |||
if( chdir("/mnt") != 0 ) { | if( chdir("/mnt") != 0 ) { | ||
printk("time to panic: chdir: %s\n", strerror(errno)); | printk("time to panic: chdir: %s\n", strerror(errno)); | ||
Line 533: | Line 580: | ||
} | } | ||
printk("( | printk("(5) Attempting cmd: mount --move . / \n"); | ||
if( mount(".", "/", NULL, MS_MOVE, NULL) != 0 ) { | if( mount(".", "/", NULL, MS_MOVE, NULL) != 0 ) { | ||
printk("time to panic: mount: %s\n", strerror(errno)); | printk("time to panic: mount: %s\n", strerror(errno)); | ||
Line 539: | Line 586: | ||
} | } | ||
printk("( | printk("(6) Attempting cmd: chroot . \n"); | ||
if( chroot(".") != 0 ) { | if( chroot(".") != 0 ) { | ||
printk("time to panic: chroot: %s\n", strerror(errno)); | printk("time to panic: chroot: %s\n", strerror(errno)); | ||
Line 545: | Line 592: | ||
} | } | ||
printk("( | printk("(7) Attempting cmd: chdir / \n"); | ||
if( chdir("/") != 0 ) { | if( chdir("/") != 0 ) { | ||
printk("time to panic: chdir: %s\n", strerror(errno)); | printk("time to panic: chdir: %s\n", strerror(errno)); | ||
Line 555: | Line 602: | ||
* stdin, stdout, and stderr to named console device | * stdin, stdout, and stderr to named console device | ||
*/ | */ | ||
if( param[iconsole].src == | if( param[iconsole].src == PARAM_SRC_CMDLINE ) { | ||
printk("Console redirection to device %s requested.\n", param[iconsole].v); | printk("Console redirection to device %s requested.\n", param[iconsole].v); | ||
/* expect only basename of console device (e.g., ttyS0), so chdir /dev */ | /* expect only basename of console device (e.g., ttyS0), so chdir /dev */ | ||
Line 640: | Line 687: | ||
At the lilo prompt, use kernel parameter: console=ttyS0 (do NOT say /dev/ttS0). Passing runlevel=s1 does not appear to be necessary. | At the lilo prompt, use kernel parameter: console=ttyS0 (do NOT say /dev/ttS0). Passing runlevel=s1 does not appear to be necessary. | ||
When you are in qemu's serial | When you are in qemu's serial mode, the interface is a little different than in the normal graphical mode: use keypresses: | ||
<pre> | <pre> | ||
ctrl-a h # for help | ctrl-a h # for help | ||
Line 658: | Line 705: | ||
stty cols $COLUMNS rows $LINES | stty cols $COLUMNS rows $LINES | ||
exec bash | exec bash | ||
reset | |||
</pre> | </pre> | ||
So, you set the environment variables and also set them with stty. Then, restart bash (exec, or else it is a child shell process). | So, you set the environment variables and also set them with stty. Then, restart bash (exec, or else it is a child shell process). |
Revision as of 15:02, 25 August 2012
The first "ZFS root" wiki explains the details of running ZFS at your root filesystem by using a fully modular generic kernel approach. Now, I can share how it can be done with the SPL and ZFS modules built into the kernel. This procedure is just an example and can use some fine tuning, but here goes:
The steps below are to create kernel with SPL and ZFS modules builtin. This kernel will be installed as an alternatve kernel to boot in lilo, and it will have a separate installation from the fully modular and working system. This will allow testing the builtin kernel while able to boot back onto a working modular ZFS system. We start this procedure assuming you are on a working fully modular ZFS install as in the "ZFS root" wiki.
zfs set mountpoint=legacy zfs-root # use legacy so zfs will not expect zfs mount, but instead expect standard mount for this fs # edit /etc/rc.d/rc.S and rc.6 to use regular "mount" commands, remove "zfs" commands # edit rc.6 and remove or comment out zfs export command zpool set bootfs=zfs-root zfs-root # this may help, but not really sure mkdir /boot/initramfs-source # this will hold some files for rootfs inside kernel cd ~ mkdir src cd src tar xvzf /mnt/cdrom/slackware64/k/kernel-source-*.txz mv usr/src/linux-3.2.27 /usr/src/linux-3.2.27b rm -r install cd /usr/src/linux-3.2.27b make menuconfig General setup->Local version - append to kernel release = b General setup->Default hostname = slackzfs General setup->Initramfs source files(s) = /boot/initramfs-source # make usre you made this directory or the kernel build fails Device Drivers->SCSI device support->SCSI low-level drivers-> <*> SYM53C8XX Version 2 SCSI support # for qemu if=scsi -option-rom 8xx_64.rom,bootindex=1 hard disks # buildin any hard drive controllers etc that you need File systems -> <*> The Extended 4 (ext4) filesystem # /boot may use this ext4 fs make prepare scripts # this make command is what the spl and zfs copy-builtin scripts expect to be done before they are run cd ~/src tar xvzf ~/spl-0.6.0-rc10.tar.gz mkdir install cd spl-0.6.0-rc10 ./configure \ --prefix=/ \ --libdir=/lib64 \ --includedir=/usr/include \ --datarootdir=/usr/share \ --enable-linux-builtin=yes \ --with-linux=/usr/src/linux-3.2.27b \ --with-linux-obj=/usr/src/linux-3.2.27b wget https://raw.github.com/zfsonlinux/spl/master/copy-builtin chmod +x copy-builtin ./copy-builtin /usr/src/linux-3.2.27b make make install DESTDIR=~/src/install cd ~/src/install makepkg ../spl-0.6.0rc10_3.2.27b-x86_64-1root.txz cd .. rm -r install tar xvzf ~/zfs-0.6.0-rc10.tar.gz mkdir install cd spl-0.6.0-rc10 ./configure \ --prefix=/ \ --libdir=/lib64 \ --includedir=/usr/include \ --datarootdir=/usr/share \ --enable-linux-builtin=yes \ --with-linux=/usr/src/linux-3.2.27b \ --with-linux-obj=/usr/src/linux-3.2.27b \ --with-spl=/root/src/spl-0.6.0-rc10 wget https://raw.github.com/zfsonlinux/zfs/master/copy-builtin chmod +x copy-builtin ./copy-builtin /usr/src/linux-3.2.27b make make install DESTDIR=~/src/install cd ~/src/install makepkg ../zfs-0.6.0rc10_3.2.27b-x86_64-1root.txz cd .. rm -r install ### move zfs and spl modules inside kernel source to be at end of drivers: ### the order builtin modules init is the order they link into the kernel ### and we need zfs to init after all hard drive controllers ### zfs is more like a device driver layer over the lower-level hba drivers cd /usr/src/linux-3.2.27b mkdir drivers/zfsonlinux mv spl drivers/zfsonlinux vi Kconfig # remove references to spl vi Makefile # remove references to spl cd /usr/src/linux-3.2.27b/fs mv zfs ../drivers/zfsonlinux vi Kconfig # remove references to zfs vi Makefile # remove references to zfs cd /usr/src/linux-3.2.27b/drivers vi Kconfig )# add line at end of menu, before "endmenu": )source "drivers/zfsonlinux/Kconfig" )endmenu cd /usr/src/linux-3.2.27b/drivers vi Makefile )# add line at very end of file: )obj-$(CONFIG_ZFSONLINUX) += zfsonlinux/ cd /usr/src/linux-3.2.27b/drivers/zfsonlinux cat > Kconfig <<"EOF" menuconfig ZFSONLINUX tristate "ZFSonLinux support" if ZFSONLINUX source "drivers/zfsonlinux/spl/Kconifg" source "drivers/zfsonlinux/zfs/Kconifg" endif EOF cd /usr/src/linux-3.2.27b/zfsonlinux cat > Makefile <<"EOF" obj-$(CONFIG_SPL) += spl/ obj-$(CONFIG_ZFS) += zfs/ EOF ### move complete cd /usr/src/linux-3.2.27b make menuconfig Device Drivers -> <*> ZFSonLinux support -> <*> Solaris Porting Layer (SPL) <*> ZFS ### prepare contents of initramfs cd /boot/initramfs-source # make standard directories mkdir -p proc dev sys mnt bin sbin etc/zfs # zfs seems to want mtab present, even if empty touch etc/mtab # if zpool.cache file can be read by zfs at module init, it imports the pools in the cache cp /etc/zfs/zpool.cache-initrd etc/zfs/zpool.cache # make initial console device node; otherwise, there is a problem at startup: # "Warning: unable to open an initial console" and you'd have problems with console and tty login # making these nodes from within /init is not early enough to avoid problem mknod dev/console c 5 1 # system console # make memory device kmsg to "printk" kernel messages # we can write to this file to send out messages mknod dev/kmsg c 1 11 # lines printed to kmsg enter kernel messages buffer # recommended loop0 device to mount fs images mknod dev/loop0 b 7 0 # make initial virtual terminal devices mknod dev/tty c 5 0 # current tty mknod dev/tty0 c 4 0 # current virtual term mknod dev/tty1 c 4 1 # login virtual term 1 (F1) # make alternative console=ttyS0 standard 8250/16550 UART serial port devices # useful with kernel parameter console=ttyS0 # with qemu -nographic -option-rom sgabios.bin,bootindex=0 mknod dev/ttyS0 c 4 64 # COM1 mknod dev/ttyS1 c 4 65 # COM2 mknod dev/ttyS2 c 4 66 # COM3 mknod dev/ttyS3 c 4 67 $ COM4 # this should be enough device nodes initially # once devtmpfs mounts over /dev, a lot more is available in it by default cat > /init <<"EOF" <some initial program PID=1 for the system to start on> EOF # an initramfs-source/init program is required to mount and boot a root linux system # the common way to make /init is using busybox with files like this: # /bin/busybox # /bin/ash -> busybox # /bin/sh -> busybox # # run busybox to see what other "applets" can be symlinked to it # # when busybox is run as a different symlinked applet name, it runs the applet # /init # # init is ash script, see slackware's /boot/initrd-tree/init for example # Inside this script: # 1) parse kernel parameters passed into it and set variables based on them # 2) start udevd and trigger rules for block devices to setup initial /dev devices # 3) load kernel modules and keyboard map # 4) run mdadm, cryptsetup, lvm, zpool/zfs to setup more devices # (any udev rules for them should do more /dev setup in the background) # 5) mount the root filesystem read-only at /mnt # 6) stat /mnt/sbin/init, and if not executable run a rescue shell /bin/sh # 7) shutdown udevd so it can be restarted by root system when booted # 8) mount --move {proc,sys,run,dev} to under /mnt # 9) run: exec switch_root /mnt /sbin/init $RUNLEVEL # The slackware mkinitrd package is fully configured busybox installation # for an extern file initramfs initrd.gz image outside the kernel. # It could be changed to be inside the kernel easily by just using initrd-tree # as target of INITRAMFS_SOURCE in the kernel config and removing initrd in lilo. # Because the zfs modules are builtin, we really don't need busybox and the whole # mkinitrd package. All we need to do is mount the zfs that is already imported # because the zpool.cache is present in rootfs when the zfs module initializes, and # then we do the switch root procedure to boot up on the mounted system. # udevd rules will still be run when the root system starts udevd. # So, let's make a simple /init c program that will be all that we really need: cat > init.c <<"EOF" /* software product name: foobarz-init.c * suggested binary name: /init (in initramfs-source, rootfs) * license : BSD * license text: Copyright (c) 2012, foobarz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the <organization> nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FOOBARZ_INIT_VERSION "1.0.9" #define _BSD_SOURCE #include <stdio.h> #include <stdarg.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <sys/mount.h> #include <sysexits.h> #include <errno.h> #include <string.h> #define PARAM_REQ_NO 0 #define PARAM_REQ_YES 1 #define PARAM_SRC_DEFAULT 0 #define PARAM_SRC_CMDLINE 1 /* Your initramfs-source should contain the following * cd /boot/initramfs-source * mkdir -p proc dev sys mnt bin sbin etc/zfs * touch etc/mtab * cp /etc/zfs/zpool.cache-initrd etc/zfs/zpool.cache * mknod dev/console c 5 1 # system console * mknod dev/kmsg c 1 11 # lines printed to kmsg enter kernel messages buffer * mknod dev/loop0 b 7 0 * mknod dev/tty c 5 0 # current tty * mknod dev/tty0 c 4 0 # current virtual term * mknod dev/tty1 c 4 1 # login virtual term 1 (F1) * mknod dev/ttyS0 c 4 64 # COM1 * mknod dev/ttyS1 c 4 65 # COM2 * mknod dev/ttyS2 c 4 66 # COM3 * mknod dev/ttyS3 c 4 67 $ COM4 */ void printk(char *fmt, ...) { FILE* f; int fd; va_list args; f = fopen("/dev/kmsg", "w"); va_start(args, fmt); vfprintf(f, fmt, args); va_end(args); fflush(f); fclose(f); /* avoid flooding kmsg and having msgs suppressed; 20msgs/sec */ usleep(50000); } int main(int argc, char* argv[]) { /*** variables */ int i; int fd = 0; /* file descriptor */ unsigned long mountflags; /* kernel command line */ off_t cmdline_size; char* cmdline; /* to be malloc 4096B */ char* cmdline_end; char* temp_end; char* src_msg; /* default or cmdline */ int flag_param_missing = 0; /* use to hold contents of a misc /proc/<file> */ char* miscproc_buff; /* to be malloc 4096B */ off_t miscproc_size; /* note about environ, argv, and kernel cmdline for init: * environ is not defined for init * only argv[0] is set for init * kernel command line parameters are accessed * at /proc/cmdline */ /* kernel parameters expected to be name=value */ /* do not use quotes or spaces in parameters */ /* you can add more params somwhere after root= */ struct nv { char* n; char* v; char* v_end; int req; int src; }; struct nv param[] = { { "root=", NULL, NULL, PARAM_REQ_YES, PARAM_SRC_DEFAULT }, { "rootfstype=", NULL, NULL, PARAM_REQ_YES, PARAM_SRC_DEFAULT }, { "mountopt=", NULL, NULL, PARAM_REQ_NO , PARAM_SRC_DEFAULT }, { "init=", NULL, NULL, PARAM_REQ_NO , PARAM_SRC_DEFAULT }, { "runlevel=", NULL, NULL, PARAM_REQ_NO , PARAM_SRC_DEFAULT }, { "console=", NULL, NULL, PARAM_REQ_NO , PARAM_SRC_DEFAULT } }; enum { iroot, irootfstype, imountopt, iinit, irunlevel, iconsole, ilastparam }; /*** program */ printk("foobarz-init, version %s: booting initramfs.\n", FOOBARZ_INIT_VERSION); cmdline = (char*) malloc(4096); miscproc_buff = (char*) malloc(4096); if( (cmdline == NULL) || (miscproc_buff == NULL) ) { printk("Unable to allocate buffer memory: malloc: %s\n", strerror(errno)); return EX_UNAVAILABLE; } /* mount proc /proc * note: some /dev devices symlink into /proc * proc contains info about processes, including cmdline etc. */ printk("Attempting cmd: mount proc /proc\n"); if( mount("proc", "/proc", "proc", 0, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } else { printk("Mount proc successful.\n"); } /* mount devtmpfs /dev * note: This simple init program works if your root device is made from devices * that are available by default in devtmpfs, such as /dev/sd* * * For zfs, your root zfs pool should be created with default device nodes and * then it should be mountable by this simple init program. * * udev may be needed to configure device nodes and symlinks required * to access a root device configuration made with such nodes and symlinks. * If you need udevd, you can include it into your initramfs-source and * modify this program to run it before attempting to mount your root device. * However, if udevd is needed, a significant number of userspace programs may also be * required by rules in /lib/udev/. You could install busybox + udev (about 5MB) or * coreutils + util-linux + bash + udev (about 25MB) into initramfs-source. But, at that * point you'd have ash or bash and many tools that are easier to use than this * simple init program; it would then be easy to have /init as #!/bin/<b>ash script. */ printk("Attempting cmd: mount devtmpfs /dev\n"); if( mount("devtmpfs", "/dev", "devtmpfs", 0, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } else { printk("Mount devtmpfs successful.\n"); } /* mount sysfs /sys * note: some kernel modules try to access /sys with userspace helpers to echo values into /sys variables; * such modules expect a minimal userspace that contains coreutils or busybox */ printk("Attempting cmd: mount sysfs /sys\n"); if( mount("sysfs", "/sys", "sysfs", 0, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } else { printk("Mount sysfs successful.\n"); } /* process kernel command line */ fd = open("/proc/cmdline", O_RDONLY); if( fd == -1 ) { printk("Cannot open /proc/cmdline: %s\n", strerror(errno)); return EX_UNAVAILABLE; } /* note, on /proc fs: * lseek likely always returns error * stat likely always returns st_size = 0 * so determining size of /proc file means just reading it; * you have to read /proc files according to their documented * maximum sizes; this is probably for performance reasons */ cmdline_size = read(fd, cmdline, 4095); if( cmdline_size == -1 ) { printk("Failed to read /proc/cmdline: %s\n", strerror(errno)); return EX_UNAVAILABLE; } close(fd); /* cmdline may be newline + null terminated, but make it null + null */ cmdline[cmdline_size] = '\0'; if( cmdline[cmdline_size-1] == '\n' ) { cmdline[cmdline_size-1] = '\0'; cmdline_size--; cmdline_end = cmdline + cmdline_size; } printk("Kernel cmdline size: %i\n", cmdline_size); printk("Kernel cmdline: \"%s\"\n", cmdline); /* find v and v_end of values in cmdline */ for( i=iroot; i<ilastparam; i++ ) { param[i].v = strstr(cmdline, param[i].n); if( param[i].v != NULL ) { param[i].src = PARAM_SRC_CMDLINE; while( *(param[i].v) != '=' ) param[i].v++; param[i].v++; temp_end = param[i].v; while( !( (*temp_end == ' ') || (*temp_end == '\n') || (temp_end == cmdline_end) ) ) temp_end++; if( temp_end == param[i].v ) { printk("Kernel parameter %s: value missing.\n", param[i].n); param[i].v = NULL; } else param[i].v_end = temp_end; } } for( i=iroot; i<ilastparam; i++ ) { /* terminate value strings */ if( param[i].v_end != NULL ) *(param[i].v_end) = '\0'; /* set defaults if no value on cmdline */ if( param[i].v == NULL ) { param[i].src = PARAM_SRC_DEFAULT; if( param[i].req == PARAM_REQ_YES ) flag_param_missing = 1; switch(i) { case iroot : param[i].v = "<missing required param>" ; break; case irootfstype: param[i].v = "<missing required param>" ; break; case imountopt : param[i].v = "ro" ; break; case iinit : param[i].v = "/sbin/init"; break; case irunlevel : param[i].v = "3" ; break; case iconsole : param[i].v = "console" ; break; default : param[i].v = ""; } } if(param[i].src == PARAM_SRC_DEFAULT) src_msg = "default"; else src_msg = "cmdline"; printk("Using %s \"%s\" (source: %s)\n", param[i].n, param[i].v, src_msg); } if( flag_param_missing ) { printk("Aborting boot process: missing required kernel parameter(s).\n"); return EX_USAGE; } /* generic nv pair kernel cmdline processing finished * now, examine specific params for defaults and correctness */ /* param[irootfstype]: can be checked against /proc/filesystems: */ fd = open("/proc/filesystems", O_RDONLY); if( fd == -1 ) { printk("Cannot open /proc/filesystems: %s\n", strerror(errno)); return EX_UNAVAILABLE; } miscproc_size = read(fd, miscproc_buff, 4095); if( miscproc_size == -1 ) { printk("Failed to read /proc/filesystems: %s\n", strerror(errno)); return EX_UNAVAILABLE; } close(fd); if( strstr(miscproc_buff, param[irootfstype].v) == NULL ) { printk("%s \"%s\": filesystem type not available.\n", param[irootfstype].n, param[irootfstype].v); return EX_UNAVAILABLE; } /* zfs-specific informative checks */ if( strcmp(param[irootfstype].v, "zfs") == 0 ) { if( access("/etc/zfs/zpool.cache", F_OK) == 0 ) printk("rootfstype=%s: /etc/zfs/zpool.cache is present in initramfs.\n", param[irootfstype].v); else printk("rootfstype=%s: /etc/zfs/zpool.cache not present in initramfs.\n", param[irootfstype].v); if( access("/etc/hostid", F_OK) == 0 ) printk("rootfstype=%s: /etc/hostid is present in initramfs.\n", param[irootfstype].v); else printk("rootfstype=%s: /etc/hostid not present in initramfs.\n", param[irootfstype].v); } if( strcmp(param[imountopt].v, "ro") == 0 ) mountflags = MS_RDONLY; else if( strcmp(param[imountopt].v, "rw") == 0 ) mountflags = 0; else { printk("%s \"%s\": invalid parameter value; defaulting to \"ro\".\n", param[imountopt].n, param[imountopt].v); mountflags = MS_RDONLY; } /* param[iroot]: nothing to check; if user gives bad root=device then mount fails */ /* try to mount root=device at /mnt * * note: for zfs, if a copy of /etc/zfs/zpool.cache (when pool is imported) is put in initramfs-source, then * the zfs module can read it and automatically import the pools described in the cache file; the imported * pools can be available to mount here if they were created using standard device names, otherwise * udevd may be required to run before mounting the pool */ printk("Attempting cmd: mount -t %s -o %s %s /mnt.\n", param[irootfstype].v, param[imountopt].v, param[iroot].v); if( mount(param[iroot].v, "/mnt", param[irootfstype].v, mountflags, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("%s mounted successfully.\n", param[iroot].v); /* check to see if the mounted root filesystem has an executable init program */ chdir("/mnt"); if( access(param[iinit].v+1, X_OK) != 0 ) { chdir("/"); printk("access X_OK: %s\n", strerror(errno)); printk("The init program /mnt/%s is not present or not executable.\n", param[iinit].v+1); printk("Aborting boot process: no init program.\n"); printk("Unmounting %s.\n", param[iroot].v); if( umount("/mnt") == -1 ) { printk("umount: %s\n", strerror(errno)); printk("Failed to umount %s.\n", param[iroot].v); } else printk("Successfully unmounted %s.\n", param[iroot].v); return EX_UNAVAILABLE; } chdir("/"); printk("Init program /mnt/%s is present and executable.\n", param[iinit].v+1); /* switch the root / from initramfs to the mounted new root device at /mnt. * * note: after this switch, it is not possible to access the initramfs files anymore, * yet they consume ram memory unless they are deleted here before switching. * Any programs that are run after clearing the initramfs and switching root must exist on the new root. * This program may safely delete itself (/init) since it is already in ram and executing. * If you have installed additional files and programs in initramfs that consume significant ram, * then you need to insert additional code here to delete those files (carefully). */ /* delete files off of initramfs to free ram memory */ printk("Freeing memory from initramfs...\n"); if( unlink("/init") != 0 ) printk("unlink %s: %s\n", "/init", strerror(errno)); else printk("/init %s", "deleted from initramfs.\n"); /* switch root */ printk("Beginning switch root procedure.\n"); printk("(1) Attempting cmd: mount --move /dev /mnt/dev \n"); if( mount("/dev", "/mnt/dev", NULL, MS_MOVE, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("(2) Attempting cmd: mount --move /proc /mnt/proc \n"); if( mount("/proc", "/mnt/proc", NULL, MS_MOVE, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("(3) Attempting cmd: mount --move /sys /mnt/sys \n"); if( mount("/sys", "/mnt/sys", NULL, MS_MOVE, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("(4) Attempting cmd: chdir /mnt \n"); if( chdir("/mnt") != 0 ) { printk("time to panic: chdir: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("(5) Attempting cmd: mount --move . / \n"); if( mount(".", "/", NULL, MS_MOVE, NULL) != 0 ) { printk("time to panic: mount: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("(6) Attempting cmd: chroot . \n"); if( chroot(".") != 0 ) { printk("time to panic: chroot: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("(7) Attempting cmd: chdir / \n"); if( chdir("/") != 0 ) { printk("time to panic: chdir: %s\n", strerror(errno)); return EX_UNAVAILABLE; } printk("Completed switch root procedure.\n"); /* check for "console=" kernel parameter and switch * stdin, stdout, and stderr to named console device */ if( param[iconsole].src == PARAM_SRC_CMDLINE ) { printk("Console redirection to device %s requested.\n", param[iconsole].v); /* expect only basename of console device (e.g., ttyS0), so chdir /dev */ chdir("/dev"); if( access(param[iconsole].v, F_OK ) == 0 ) { printk("Opening stdin, stdout, and stderr on %s.\n", param[iconsole].v); close(0); open(param[iconsole].v, O_RDWR); dup2(0, 1); dup2(0, 2); } else { printk("access F_OK: %s\n", strerror(errno)); printk("Could not access device: %s!\n", param[iconsole].v); printk("Console redirection to device %s aborted!\n", param[iconsole].v); } chdir("/"); } printk("Execing: \"%s %s\" to boot mounted root system.\n", param[iinit].v, param[irunlevel].v); /* free resources held to this point */ free(cmdline); free(miscproc_buff); if( execl(param[iinit].v, param[irunlevel].v, (char *) NULL ) != 0 ) { printk("time to panic: execl: %s\n", strerror(errno)); return EX_UNAVAILABLE; } } EOF ## end of rootfs /init gcc --static init.c -o init strip init ## build and install kernel cd /usr/src/linux-3.2.27b make -j8 make -j8 modules_install cp arch/x86/boot/bzImage /boot/vm3.2.27b cp System.map /boot/System.map-vm3.2.27b # add new lilo menu entry for vm3.2.27b kernel vi /etc/lilo.conf image = /boot/vm3.2.27b label = vm3.2.27b addappend = " spl.spl_hostid=0x007f0100 zfs.spa_config_path=/etc/zfs/zpool.cache root=zfs-root ro rootfstype=zfs rootwait " lilo reboot
Notes on kernel params (lilo append and addappend lines): root= and rootfstype= are used by both foorbarz-init and mkinitrd. The "ro" param is not used by foobarz-init, so instead use: mountopt=ro|rw. See "modinfo spl" and "modinfo zfs" to see all of the options those modules can take on the kernel command line.
Notes on the spl and zfs packages to install: If you have errors using zfs and zpool commands on booted system with builtin modules, then upgrade/switch to the 0.6.0rc10_3.2.27b builds, or you can make a custom package with the binaries renamed like zpoolb, zfsb etc for builtin.
Using qemu's -nographic serial console (/usr/share/qemu/sgabios.bin): If you have a boot problem, you might want to run in qemu's -nographic console mode:
First, edit /etc/inittab, and uncomment the line for local serial ports runlevel "s1" (ttyS0) for serial console login support. This is done in the guest (host does not need this):
# Local serial lines: s1:12345:respawn:/sbin/agetty -L ttyS0 38400 linux #s1:12345:respawn:/sbin/agetty -L ttyS0 9600 vt100 #s2:12345:respawn:/sbin/agetty -L ttyS1 9600 vt100
Second, edit /etc/securetty, and uncomment ttyS0 line:
ttyS0 #ttyS1 # ...
You cannot login on ttyS0 without this change even though you get the login prompt!
Then, run qemu such as:
qemu-kvm <all regular options> -nographic -option-rom sgabios.bin,bootindex=0
Wait for the lilo prompt, it may take several seconds show up in this mode.
In the lilo menu, select your kernel image with builtin spl/zfs, and use kernel params such as:
boot: vm3.2.27b console=ttyS0
At the lilo prompt, use kernel parameter: console=ttyS0 (do NOT say /dev/ttS0). Passing runlevel=s1 does not appear to be necessary.
When you are in qemu's serial mode, the interface is a little different than in the normal graphical mode: use keypresses:
ctrl-a h # for help ctrl-a c # toggle access to (QEMU) console / back to emulation
If you cannot get a login: prompt, try ctrl-a c, then do (QEMU) sendkey ctrl-alt-delete. This will reboot cleanly back to lilo, then you can do (QEMU) quit.
Once you login to the guest, the terminal size needs adjustment. On your host, in another xterm sized the same as your guest console terminal, get the size of the terminal:
echo $LINES echo $COLUMNS
Now, back in the guest terminal of same size:
export LINES <lines> export COLUMNS <columns> stty cols $COLUMNS rows $LINES exec bash reset
So, you set the environment variables and also set them with stty. Then, restart bash (exec, or else it is a child shell process).
BECAREFUL not to start qemu twice on the same ZFS guest! Two simultaneous running qemu on the same ZFS will corrupt the pool and it will NOT recover - you lose your whole installation! Before running qemu, check ps -A, look for an already running qemu process incase it somehow entered into the background!
BECAREFUL when using -nographic ttyS0 that you are not doing commands on the HOST! Only use -nographic to see kernel problems. Use regular console of booting is working normally.
To use the qemu -drive if=scsi option, the linux module sym53c8xx should be loaded in the guest. You may need to use the qemu option:
-option-rom 8xx_64.rom,bootindex=1
The file 8xx_64.com used to be inside qemu, but in new versions it is missing. You can download it:
wget http://www.lsi.com/downloads/Public/Host%20Bus%20Adapters/Host%20Bus%20Adapters%20Common%20Files/lsi_bios.zip
Extract the zip and place 8xx_64.rom in /usr/share/qemu/.
Good luck!