ZFS root (builtin)

From SlackWiki
Revision as of 18:36, 21 August 2012 by Foobarz (talk | contribs)
Jump to navigation Jump to search

The first "ZFS root" wiki explains the details of running ZFS at your root filesystem by using a fully modular generic kernel approach. Now, I can share how it can be done with the SPL and ZFS modules built into the kernel. This procedure is just an example and can use some fine tuning, but here goes:

The steps below are to create kernel with SPL and ZFS modules builtin. This kernel will be installed as an alternatve kernel to boot in lilo, and it will have a separate installation from the fully modular and working system. This will allow testing the builtin kernel while able to boot back onto a working modular ZFS system. We start this procedure assuming you are on a working fully modular ZFS install as in the "ZFS root" wiki.

zfs set mountpoint=legacy zfs-root
# use legacy so zfs will not expect zfs mount, but instead expect standard mount for this fs
# edit /etc/rc.d/rc.S and rc.6 to use regular "mount" commands, remove "zfs" commands
# edit rc.6 and remove or comment out zfs export command
zpool set bootfs=zfs-root zfs-root
# this may help, but not really sure

mkdir /boot/initramfs-source
# this will hold some files for rootfs inside kernel

cd ~
mkdir src
cd src
tar xvzf /mnt/cdrom/slackware64/k/kernel-source-*.txz
mv usr/src/linux-3.2.27 /usr/src/linux-3.2.27b
rm -r install
cd /usr/src/linux-3.2.27b
make menuconfig
  General setup->Local version - append to kernel release = b
  General setup->Default hostname                         = slackzfs
  General setup->Initramfs source files(s)                = /boot/initramfs-source
  #  make usre you made this directory or the kernel build fails
  Device Drivers->SCSI device support->SCSI low-level drivers-> <*> SYM53C8XX Version 2 SCSI support
  #  for qemu if=scsi -option-rom 8xx_64.rom,bootindex=1  hard disks
  # buildin any hard drive controllers etc that you need
  File systems -> <*> The Extended 4 (ext4) filesystem
  # /boot may use this ext4 fs
make prepare scripts
# this make command is what the spl and zfs copy-builtin scripts expect to be done before they are run

cd ~/src
tar xvzf ~/spl-0.6.0-rc10.tar.gz
mkdir install
cd spl-0.6.0-rc10
./configure --prefix=/ --libdir=/lib64 --includedir=/usr/include --datarootdir=/usr/share --enable-linux-builtin=yes --with-linux=/usr/src/linux-3.2.27b --with-linux-obj=/usr/src/linux-3.2.27b
wget https://raw.github.com/zfsonlinux/spl/master/copy-builtin
chmod +x copy-builtin
./copy-builtin /usr/src/linux-3.2.27b
make
make install DESTDIR=~/src/install
cd ~/src/install
makepkg ../spl-0.6.0rc10_3.2.27b-x86_64-1root.txz
cd ..
rm -r install

tar xvzf ~/zfs-0.6.0-rc10.tar.gz
mkdir install
cd spl-0.6.0-rc10
./configure --prefix=/ --libdir=/lib64 --includedir=/usr/include --datarootdir=/usr/share --enable-linux-builtin=yes --with-linux=/usr/src/linux-3.2.27b --with-linux-obj=/usr/src/linux-3.2.27b --with-spl=/root/src/spl-0.6.0-rc10
wget https://raw.github.com/zfsonlinux/zfs/master/copy-builtin
chmod +x copy-builtin
./copy-builtin /usr/src/linux-3.2.27b
make
make install DESTDIR=~/src/install
cd ~/src/install
makepkg ../zfs-0.6.0rc10_3.2.27b-x86_64-1root.txz
cd ..
rm -r install

### move zfs and spl modules inside kernel source to be at end of drivers:
### the order builtin modules init is the order they link into the kernel
### and we need zfs to init after all hard drive controllers
### zfs is more like a device driver layer over the lower-level hba drivers

cd /usr/src/linux-3.2.27b
mkdir drivers/zfsonlinux
mv spl drivers/zfsonlinux
vi Kconfig
  # remove references to spl
vi Makefile
  # remove references to spl

cd /usr/src/linux-3.2.27b/fs
mv zfs ../drivers/zfsonlinux
vi Kconfig
  # remove references to zfs
vi Makefile
  # remove references to zfs

cd /usr/src/linux-3.2.27b/drivers
vi Kconfig
  )# add line at end of menu, before "endmenu":
  )source "drivers/zfsonlinux/Kconfig"
  )endmenu

cd /usr/src/linux-3.2.27b/drivers
vi Makefile
  )# add line at very end of file:
  )obj-$(CONFIG_ZFSONLINUX) += zfsonlinux/

cd /usr/src/linux-3.2.27b/drivers/zfsonlinux
cat > Kconfig <<"EOF"
menuconfig ZFSONLINUX
	tristate "ZFSonLinux support"

if ZFSONLINUX

source "drivers/zfsonlinux/spl/Kconifg"

source "drivers/zfsonlinux/zfs/Kconifg"

endif
EOF

cd /usr/src/linux-3.2.27b/zfsonlinux
cat > Makefile <<"EOF"
obj-$(CONFIG_SPL) += spl/
obj-$(CONFIG_ZFS) += zfs/
EOF

### move complete

cd /usr/src/linux-3.2.27b
make menuconfig
   Device Drivers ->
    <*> ZFSonLinux support ->
      <*> Solaris Porting Layer (SPL)
      <*>     ZFS

cd /boot/initramfs-source
mkdir -p etc/zfs dev mnt
touch etc/mtab
cp /etc/zfs/zpool.cache-initrd etc/zfs/zpool.cache

### make the rootfs /init program
cat > init.c <<"EOF"
#define _BSD_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mount.h>
#include <sysexits.h>
#include <errno.h>
#include <string.h>

int main(int argc, char* argv[]) {
 FILE* kmsg = NULL;
 char* fnkmsg = "/dev/kmsg";
 mknod(fnkmsg, S_IFCHR , makedev(1,11) );
 kmsg = fopen(fnkmsg, "w");
 fprintf(kmsg, "foobarz-zinit starting.\n");

 fprintf(kmsg, "Attempting mount devtmpfs to /dev\n");
 if( mount("devtmpfs", "/dev", "devtmpfs", 0, NULL) != 0 ) {
  fprintf(kmsg, "time to panic: mount: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 } else {
  fprintf(kmsg, "Mount devtmpfs successful.\n");
 }

 if( access("/dev/zfs", F_OK) != 0 ) {
  fprintf(kmsg, "File /dev/zfs does not exist; making nod.\n");
  mknod("/dev/zfs", S_IFCHR , makedev(10,58) );
 } else {
  fprintf(kmsg, "File /dev/zfs already exists. Good.\n");
 }

 if( access("/dev/sda1", F_OK) != 0 ) {
  fprintf(kmsg, "Files /dev/sd{a,a1} do not exist; making nods.\n");
  mknod("/dev/sda", S_IFBLK , makedev(8,0) );
  mknod("/dev/sda1", S_IFBLK , makedev(8,1) );
 } else {
  fprintf(kmsg, "Files /dev/sd{a,a1} already exist. Good.\n");
 }

 fprintf(kmsg, "Attempting mount-ro newroot to /mnt\n");
 if( mount("zfs-root", "/mnt", "zfs", MS_RDONLY, NULL) != 0 ) {
  fprintf(kmsg, "time to panic: mount: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }

 fprintf(kmsg, "Attempting mount-move /dev to /mnt/dev\n");
 if( mount("/dev", "/mnt/dev", NULL, MS_MOVE, NULL) != 0 ) {
  fprintf(kmsg, "time to panic: mount: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }

 fprintf(kmsg, "Beginning switch root procedure.\n");
 fprintf(kmsg, "Attempting chdir from / to /mnt\n");
 if( chdir("/mnt") != 0 ) {
  fprintf(kmsg, "time to panic: chdir: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }

 fprintf(kmsg, "Attempting mount-move . to /\n");
 if( mount(".", "/", NULL, MS_MOVE, NULL) != 0 ) {
  fprintf(kmsg, "time to panic: mount: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }

 fprintf(kmsg, "Attempting chroot to .\n");
 if( chroot(".") != 0 ) {
  fprintf(kmsg, "time to panic: chroot: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }

 fprintf(kmsg, "Attempting chdir to /\n");
 if( chdir("/") != 0 ) {
  fprintf(kmsg, "time to panic: chdir: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }
 fprintf(kmsg, "Completed switch root procedure.\n");

 fprintf(kmsg, "Execing root system /sbin/init.\n");
 if( execl("/sbin/init", "3", (char *) NULL ) != 0 ) {
  fprintf(kmsg, "time to panic: execl: %s\n", strerror(errno));
  return EX_UNAVAILABLE;
 }

}
EOF
## end of rootfs /init

gcc --static init.c -o init
strip init

## build and install kernel
cd /usr/src/linux-3.2.27b
make -j8
make -j8 modules_install
cp arch/x86/boot/bzImage /boot/vm3.2.27b
cp System.map /boot/System.map-vm3.2.27b
# add new lilo menu entry for vm3.2.27b kernel
vi /etc/lilo.conf
  image = /boot/vm3.2.27b
   label = vm3.2.27b
   addappend = " spl.spl_hostid=0x007f0100 zfs.spa_config_path=/etc/zfs/zpool.cache root=zfs-root ro rootfstype=zfs rootwait "
lilo
reboot
# note: root=, rootfstype, ro, rootwait are "legacy" kernel boot options and are not really being used here
#       so, they probably can be removed but are not hurting

# if you have errors using zfs and zpool commands on booted system with builtin modules, then
# upgrade/switch to the 0.6.0rc10_3.2.27b builds, or you can make a custom package with the binaries
# renamed like zpoolb, zfsb etc for builtin

# if you have boot problem, you might want to run in qemu's -nographic console mode:
qemu-kvm <all regular options> -nographic -option-rom sgabios.bin,bootindex=0
# wait for the lilo prompt, it takes time to show up
boot: vm3.2.27b console=ttyS0
# ctrl-a h    for help
# ctrl-a c    for (QEMU) console
# BECAREFUL not to start qemu twice on the same ZFS guest
#  two simultaneous running qemu on the same ZFS will corrupt the pool
#  then it will NOT recover... you lose your whole installation!
# BECAREFUL when using -nographic ttyS0 that you are not doing commands on the HOST!
# only use -nographic to see kernel problems then quit it
# Doing this is only useful until the kernel panics because additional console switching needs
# to be done still. If it does boot, then ctrl-a c, and quit (sorry) and reboot normal
# The console switching part missing in init would look similar to this:
#
#if (console) {
#		close(0);
#		xopen(console, O_RDWR);
#		xdup2(0, 1);
#		xdup2(0, 2);
#	}

# to use if=scsi, linux module sym53c8xx, may need to use the qemu option:
#       -option-rom 8xx_64.rom,bootindex=1
# download it here:
# http://www.lsi.com/downloads/Public/Host%20Bus%20Adapters/Host%20Bus%20Adapters%20Common%20Files/lsi_bios.zip
# extract 8xx_64.rom to /usr/share/qemu/

Then, at the lilo prompt, use kernel parameter: console=ttyS0     (do NOT say /dev/ttS0)
There is a delay before you see LILO so be patient. You should see full messages of kernel.
The purpose of sgabios is that it is the "serial graphics adapter" video BIOS that sends all its text mode I/O over ttyS0.
The -nographic option redirects the guest's ttyS0 to your console.
The purpose of 8xx_64.rom is the SCSI interface firmware, and it boots after the SGABIOS so that everything will be visible.
For QEMU help, press ctrl-a then hit h.

Good luck!