红联Linux门户
Linux帮助

Linux虚拟文件系统(安装根文件系统)

发布时间:2014-11-25 15:14:57来源:linux网站作者:bullbat

安装根文件系统式系统初始化的关键部分。Linux内核允许根文件系统放在很多不同的地方,比如硬盘分区、软盘、通过NFS共享的远程文件系统以及保存在ramdisk中。内核要在变量ROOT_DEV中寻找包含根文件系统的磁盘主设备号。当编译内核时,或者像最初的启动装入程序传递一个合适的“root”选项时,根文件系统可以被指定为/dev目录下的一个设备文件。


安装根文件系统分为两个阶段:

1,内核安装特殊rootfs文件系统,该文件系统仅提供一个作为初始安装点的空目录

start_kernel()->vfs_caches_init()->mnt_init()->init_rootfs()

/*初始化根文件系统*/ 
int __init init_rootfs(void) 

int err; 
/*初始化ramfs_backing_dev_info*/ 
err = bdi_init(&ramfs_backing_dev_info); 
if (err) 
return err; 
/*注册rootfs_fs_type文件类型*/ 
err = register_filesystem(&rootfs_fs_type); 
if (err)/*如果出错,销毁上面初始化的*/ 
bdi_destroy(&ramfs_backing_dev_info); 
 
return err; 

static struct backing_dev_info ramfs_backing_dev_info = { 
.name   = "ramfs", 
.ra_pages   = 0,/* No readahead */ 
.capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | 
BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 
BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, 
}; 

/**
*  register_filesystem - register a new filesystem
*  @fs: the file system structure
*
*  Adds the file system passed to the list of file systems the kernel
*  is aware of for mount and other syscalls. Returns 0 on success,
*  or a negative errno code on an error.
*
*  The &struct file_system_type that is passed is linked into the kernel 
*  structures and must not be freed until the file system has been
*  unregistered.
*/ 
/*注册一个新的文件系统*/ 
int register_filesystem(struct file_system_type * fs) 

int res = 0; 
struct file_system_type ** p; 
 
BUG_ON(strchr(fs->name, '.')); 
if (fs->next) 
return -EBUSY; 
INIT_LIST_HEAD(&fs->fs_supers); 
write_lock(&file_systems_lock); 
/*从system_type链表中查找指定名称的file_system_type*/ 
p = find_filesystem(fs->name, strlen(fs->name)); 
if (*p) 
res = -EBUSY; 
else 
*p = fs; 
write_unlock(&file_systems_lock); 
return res; 
}
 

根文件系统定义如下

static struct file_system_type rootfs_fs_type = { 
.name   = "rootfs", 
.get_sb = rootfs_get_sb, 
.kill_sb= kill_litter_super, 
}; 

下面看看他的两个函数

/*获得根目录的sb*/ 
static int rootfs_get_sb(struct file_system_type *fs_type, 
int flags, const char *dev_name, void *data, struct vfsmount *mnt) 

return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, 
mnt); 
}

int get_sb_nodev(struct file_system_type *fs_type, 
int flags, void *data, 
int (*fill_super)(struct super_block *, void *, int), 
struct vfsmount *mnt) 

int error; 
/*获得sb结构*/ 
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 
 
if (IS_ERR(s)) 
return PTR_ERR(s); 
 
s->s_flags = flags; 
/*这里实际调用ramfs_fill_super,对sb结构的属性进行设置*/ 
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 
if (error) { 
deactivate_locked_super(s); 
return error; 

s->s_flags |= MS_ACTIVE; 
simple_set_mnt(mnt, s);/*设置mnt和sb关联*/ 
return 0; 

/**
*  sget-   find or create a superblock
*  @type:  filesystem type superblock should belong to
*  @test:  comparison callback
*  @set:   setup callback
*  @data:  argument to each of them
*/ 
/*查找或创建一个sb结构*/ 
struct super_block *sget(struct file_system_type *type, 
int (*test)(struct super_block *,void *), 
int (*set)(struct super_block *,void *), 
void *data) 

struct super_block *s = NULL; 
struct super_block *old; 
int err; 
 
retry: 
spin_lock(&sb_lock); 
if (test) { 
list_for_each_entry(old, &type->fs_supers, s_instances) { 
if (!test(old, data)) 
continue; 
if (!grab_super(old)) 
goto retry; 
if (s) { 
up_write(&s->s_umount); 
destroy_super(s); 

return old; 


if (!s) {/*如果找不到sb,从内存中申请一个*/ 
spin_unlock(&sb_lock); 
s = alloc_super(type); 
if (!s) 
return ERR_PTR(-ENOMEM); 
goto retry; 

 
err = set(s, data); 
if (err) { 
spin_unlock(&sb_lock); 
up_write(&s->s_umount); 
destroy_super(s); 
return ERR_PTR(err); 

/*初始化得到的sb结构*/ 
s->s_type = type; 
strlcpy(s->s_id, type->name, sizeof(s->s_id)); 
/*加入链表尾*/ 
list_add_tail(&s->s_list, &super_blocks); 
list_add(&s->s_instances, &type->fs_supers); 
spin_unlock(&sb_lock); 
get_filesystem(type); 
return s; 
}

/*所有超级块对象都以双向循环链表的形式链接在一起,量表中第一个
元素用super_blocks变量表示,而超级块对象的s_list字段存放指向链表
相邻元素的指针*/ 
LIST_HEAD(super_blocks); 

/**
*  alloc_super -   create new superblock
*  @type:  filesystem type superblock should belong to
*
*  Allocates and initializes a new &struct super_block.  alloc_super()
*  returns a pointer new superblock or %NULL if allocation had failed.
*/ 
static struct super_block *alloc_super(struct file_system_type *type) 
{
/*从内存中申请sb*/ 
struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER); 
static const struct super_operations default_op; 
 
if (s) { 
if (security_sb_alloc(s)) { 
 kfree(s); 
 s = NULL; 
 goto out; 

/*初始化*/ 
INIT_LIST_HEAD(&s->s_files); 
INIT_LIST_HEAD(&s->s_instances); 
INIT_HLIST_HEAD(&s->s_anon); 
INIT_LIST_HEAD(&s->s_inodes); 
INIT_LIST_HEAD(&s->s_dentry_lru); 
init_rwsem(&s->s_umount); 
mutex_init(&s->s_lock); 
lockdep_set_class(&s->s_umount, &type->s_umount_key); 
/*
* The locking rules for s_lock are up to the
* filesystem. For example ext3fs has different
* lock ordering than usbfs:
*/ 
lockdep_set_class(&s->s_lock, &type->s_lock_key); 
/*
* sget() can have s_umount recursion.
*
* When it cannot find a suitable sb, it allocates a new
* one (this one), and tries again to find a suitable old
* one.
*
* In case that succeeds, it will acquire the s_umount
* lock of the old one. Since these are clearly distrinct
* locks, and this object isn't exposed yet, there's no
* risk of deadlocks.
*
* Annotate this by putting this lock in a different
* subclass.
*/ 
down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); 
s->s_count = S_BIAS; 
atomic_set(&s->s_active, 1); 
mutex_init(&s->s_vfs_rename_mutex); 
mutex_init(&s->s_dquot.dqio_mutex); 
mutex_init(&s->s_dquot.dqonoff_mutex); 
init_rwsem(&s->s_dquot.dqptr_sem); 
init_waitqueue_head(&s->s_wait_unfrozen); 
s->s_maxbytes = MAX_NON_LFS; 
s->dq_op = sb_dquot_ops; 
s->s_qcop = sb_quotactl_ops; 
s->s_op = &default_op; 
s->s_time_gran = 1000000000; 

out: 
return s; 
}

kill_litter_super的过程相反,这里不再写了。

构造根目录是由init_mount_tree()函数实现的,该函数在前面已经介绍过了。


2,安装实际根文件系统

关于__setup宏

__setup宏来注册关键字及相关联的处理函数,__setup宏在include/linux/init.h中定义,其原型如下:
__setup(string, _handler);
其中:string是关键字,_handler是关联处理函数。__setup只是告诉内核在启动时输入串中含有string时,内核要去
执行_handler。String必须以“=”符结束以使parse_args更方便解析。紧随“=”后的任何文本都会作为输入传给
_handler。下面的例子来自于init/do_mounts.c,其中root_dev_setup作为处理程序被注册给“root=”关键字:
 __setup("root=", root_dev_setup);

比如我们在启动向参数终有

noinitrd root=/dev/mtdblock2 console=/linuxrc

setup_arch解释时会发现root=/dev/mtdblock2,然后它就会调用root_dev_setup

static int __init root_dev_setup(char *line) 

strlcpy(saved_root_name, line, sizeof(saved_root_name)); 
return 1; 

 
__setup("root=", root_dev_setup); 

Start_kernel->rest_init->init-> prepare_namespace->

/*
 * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
 */ 
void __init prepare_namespace(void) 

int is_floppy; 
 
if (root_delay) { 
printk(KERN_INFO "Waiting %dsec before mounting root device...\n", 
root_delay); 
ssleep(root_delay); 

 
/*
 * wait for the known devices to complete their probing
 *
 * Note: this is a potential source of long boot delays.
 * For example, it is not atypical to wait 5 seconds here
 * for the touchpad of a laptop to initialize.
 */ 
wait_for_device_probe(); 
/*创建/dev/ram0,必须得,因为initrd要放到/dev/ram0里*/ 
md_run_setup(); 
 
if (saved_root_name[0]) {/*saved_root_name为从启动参数"root"中获取的设备文件名*/ 
root_device_name = saved_root_name; 
if (!strncmp(root_device_name, "mtd", 3) || 
 !strncmp(root_device_name, "ubi", 3)) {/*如果设备名开头为这两个*/ 
 mount_block_root(root_device_name, root_mountflags); 
 goto out; 

/*主设备号和次设备号*/ 
ROOT_DEV = name_to_dev_t(root_device_name); 
if (strncmp(root_device_name, "/dev/", 5) == 0) 
 root_device_name += 5;/*滤掉'/dev/'字符*/ 

 
if (initrd_load()) 
goto out; 
 
/* wait for any asynchronous scanning to complete */ 
if ((ROOT_DEV == 0) && root_wait) { 
printk(KERN_INFO "Waiting for root device %s...\n", 
 saved_root_name); 
while (driver_probe_done() != 0 || 
 (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) 
 msleep(100); 
async_synchronize_full(); 

 
is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; 
 
if (is_floppy && rd_doload && rd_load_disk(0)) 
ROOT_DEV = Root_RAM0; 
/*实际操作*/ 
mount_root(); 
out: 
devtmpfs_mount("dev");/*devfs从虚拟的根文件系统的/dev umount*/ 
sys_mount(".", "/", NULL, MS_MOVE, NULL);/*将挂载点从当前目录【/root】(在mount_root函数中设置的)移到根目录*/ 
/*当前目录即【/root】(真正文件系统挂载的目录)做为系统根目录*/ 
sys_chroot("."); 
}


mount_root操作

void __init mount_root(void) 

#ifdef CONFIG_ROOT_NFS  
if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) { 
if (mount_nfs_root()) 
 return; 
 
printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n"); 
ROOT_DEV = Root_FD0; 

#endif  
#ifdef CONFIG_BLK_DEV_FD  
if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { 
/* rd_doload is 2 for a dual initrd/ramload setup */ 
if (rd_doload==2) { 
 if (rd_load_disk(1)) { 
 ROOT_DEV = Root_RAM1; 
 root_device_name = NULL; 
 } 
} else 
 change_floppy("root floppy"); 

#endif  
#ifdef CONFIG_BLOCK/*这里是一般流程*/  
create_dev("/dev/root", ROOT_DEV);/*用系统调用创建"/dev/root"*/ 
mount_block_root("/dev/root", root_mountflags); 
#endif  

void __init mount_block_root(char *name, int flags) 

/*从cache中分配空间*/ 
char *fs_names = __getname_gfp(GFP_KERNEL 
| __GFP_NOTRACK_FALSE_POSITIVE); 
char *p; 
#ifdef CONFIG_BLOCK  
char b[BDEVNAME_SIZE]; 
#else  
const char *b = name; 
#endif  
/*获得文件系统类型,如果在bootoption里有,
则就为这个文件系统类型,如果没有指定,
则返回ilesytem链上所有类型,下面再对每个进行尝试.*/ 
get_fs_names(fs_names); 
retry: 
for (p = fs_names; *p; p += strlen(p)+1) { 
/*实际的安装工作,这里调用了mount系统调用
将文件系统挂到/root目录,p为文件系统类型,由get_fs_names得到
*/ 
int err = do_mount_root(name, p, flags, root_mount_data); 
switch (err) { 
 case 0: 
 goto out; 
 case -EACCES: 
 flags |= MS_RDONLY; 
 goto retry; 
 case -EINVAL: 
 continue; 

 /*
 * Allow the user to distinguish between failed sys_open
 * and bad superblock on root device.
 * and give them a list of the available devices
 */ 
#ifdef CONFIG_BLOCK  
__bdevname(ROOT_DEV, b); 
#endif  
printk("VFS: Cannot open root device \"%s\" or %s\n", 
 root_device_name, b); 
printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); 
 
printk_all_partitions(); 
#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT  
printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify " 
"explicit textual name for \"root=\" boot option.\n"); 
#endif  
panic("VFS: Unable to mount root fs on %s", b); 

 
printk("List of all partitions:\n"); 
printk_all_partitions(); 
printk("No filesystem could mount root, tried: "); 
for (p = fs_names; *p; p += strlen(p)+1) 
printk(" %s", p); 
printk("\n"); 
#ifdef CONFIG_BLOCK  
__bdevname(ROOT_DEV, b); 
#endif  
panic("VFS: Unable to mount root fs on %s", b); 
out: 
putname(fs_names); 
}

static int __init do_mount_root(char *name, char *fs, int flags, void *data) 

/*mount系统调用来做实际的安装文件系统工作*/ 
int err = sys_mount(name, "/root", fs, flags, data); 
if (err) 
return err; 
/*改变当前路径到根目录*/ 
sys_chdir("/root"); 
ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev; 
printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n", 
current->fs->pwd.mnt->mnt_sb->s_type->name, 
current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ? 
" readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); 
return 0; 
}


到此,根文件系统的安装过程算是完成了,中间关于mount等系统调用将在后面分析。可以看出总的步骤主要有:

1,创建一个rootfs,这个是虚拟的rootfs,是内存文件系统(和ramfs),后面还会指向具体的根文件系统;

2,从系统启动参数中获取设备文件名以及设备号;

3,调用系统调用创建符号链接,并调用mount系统调用进程实际的安装操作;

4,改变进程当前目录;

5,移动rootfs文件系统根目录上得已经安装文件系统的安装点;
rootfs特殊文件系统没有被卸载,他只是隐藏在基于磁盘的根文件系统下了。