红联Linux门户
Linux帮助

Linux虚拟文件系统(内核初始化<一>)

发布时间:2014-11-25 15:23:43来源:linux网站作者:bullbat

Linux虚拟文件系统在内核初始化的start_kernel()函数中主要调用两个函数来实现。

asmlinkage void __init start_kernel(void) 

…… 
vfs_caches_init_early(); 
…… 
vfs_caches_init(totalram_pages); 
…… 
}
 

一、早期初始化

虚拟文件系统的早期初始化有函数vfs_caches_init_early()实现,主要负责dentry和inode的hashtable的初始化工作。

/*在start_kernel中调用,用于文件系统中早期的初始化*/ 
void __init vfs_caches_init_early(void) 

/*初始化两个hashtable*/ 
dcache_init_early(); 
inode_init_early(); 

1.1 dcache

static void __init dcache_init_early(void) 

int loop; 
 
/* If hashes are distributed across NUMA nodes, defer
 * hash allocation until vmalloc space is available.
 */ 
if (hashdist) 
return; 
/*dentry hashtable的空间分配*/ 
dentry_hashtable = 
alloc_large_system_hash("Dentry cache", 
sizeof(struct hlist_head), 
dhash_entries, 
13, 
HASH_EARLY, 
&d_hash_shift, 
&d_hash_mask, 
0); 
/*hashtable的各个链表初始化*/ 
for (loop = 0; loop < (1 << d_hash_shift); loop++) 
INIT_HLIST_HEAD(&dentry_hashtable[loop]); 

1.2  inode

/*
 * Initialize the waitqueues and inode hash table.
 */ 
void __init inode_init_early(void) 

int loop; 
 
/* If hashes are distributed across NUMA nodes, defer
 * hash allocation until vmalloc space is available.
 */ 
if (hashdist) 
return; 
/*从cache中分配inode hashtable的内存空间*/ 
inode_hashtable = 
alloc_large_system_hash("Inode-cache", 
sizeof(struct hlist_head), 
ihash_entries, 
14, 
HASH_EARLY, 
&i_hash_shift, 
&i_hash_mask, 
0); 
/*初始化hashtable 的各个链表*/ 
for (loop = 0; loop < (1 << i_hash_shift); loop++) 
INIT_HLIST_HEAD(&inode_hashtable[loop]); 
}


二、后期初始化

这阶段对inode、dentry、mount、字符设备驱动模型以及块设备驱动模型做了相应的初始化。

/*vfs初始化,在start_kernel中调用*/ 
void __init vfs_caches_init(unsigned long mempages) 

unsigned long reserve; 
 
/* Base hash sizes on available memory, with a reserve equal to
   150% of current kernel size */ 
 
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 
mempages -= reserve; 
/*为路径名申请的cache*/ 
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 
/*dentry及其相关内容初始化*/ 
dcache_init(); 
inode_init();/*inode初始化*/ 
files_init(mempages);/*文件相关信息初始化,包括文件描述符表初始化*/ 
mnt_init();/*mount 的初始化*/ 
bdev_cache_init(); 
/*字符设备驱动模型的初始化*/ 
chrdev_init(); 

2.1 dentry初始化

static void __init dcache_init(void) 

int loop; 
 
/* 
 * A constructor could be added for stable state like the lists,
 * but it is probably not worth it because of the cache nature
 * of the dcache. 
 *//*从cache中申请目录cache*/ 
dentry_cache = KMEM_CACHE(dentry, 
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 
/*注册一个shrinker*/ 
register_shrinker(&dcache_shrinker); 
 
/* Hash may have been set up in dcache_init_early */ 
if (!hashdist) 
return; 
/*下面的操作在前面的初始化中已经做了*/ 
dentry_hashtable = 
alloc_large_system_hash("Dentry cache", 
sizeof(struct hlist_head), 
dhash_entries, 
13, 
0, 
&d_hash_shift, 
&d_hash_mask, 
0); 
 
for (loop = 0; loop < (1 << d_hash_shift); loop++) 
INIT_HLIST_HEAD(&dentry_hashtable[loop]); 

2.2 inode初始化

void __init inode_init(void) 

int loop; 
 
/* inode slab cache */ 
/*slab中分配inode缓存*/ 
inode_cachep = kmem_cache_create("inode_cache", 
 sizeof(struct inode), 
 0, 
 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 
 SLAB_MEM_SPREAD), 
 init_once); 
/*注册icache shrinker,将参数对应的shrinker加入指定队列*/ 
register_shrinker(&icache_shrinker); 
 
/* Hash may have been set up in inode_init_early */ 
if (!hashdist) 
return; 
/*分配数组对应空间*/ 
inode_hashtable = 
alloc_large_system_hash("Inode-cache", 
sizeof(struct hlist_head), 
ihash_entries, 
14, 
0, 
&i_hash_shift, 
&i_hash_mask, 
0); 
/*初始化链表组*/ 
for (loop = 0; loop < (1 << i_hash_shift); loop++) 
INIT_HLIST_HEAD(&inode_hashtable[loop]); 

2.3 files初始化

void __init files_init(unsigned long mempages) 
{  
int n;  
/*申请文件cache*/ 
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 
 
/*
 * One file with associated inode and dcache is very roughly 1K.
 * Per default don't use more than 10% of our memory for files. 
 */  
 
n = (mempages * (PAGE_SIZE / 1024)) / 10; 
files_stat.max_files = n; /*更新文件统计信息*/ 
if (files_stat.max_files < NR_FILE) 
files_stat.max_files = NR_FILE; 
files_defer_init();/*释放文件描述符表*/ 
percpu_counter_init(&nr_files, 0); 
}  

2.4 mount初始化

void __init mnt_init(void) 

unsigned u; 
int err; 
 
init_rwsem(&namespace_sem); 
/*mnt cache初始化*/ 
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 
/*mount hashtable内存申请*/ 
mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 
 
if (!mount_hashtable) 
panic("Failed to allocate mount hash table\n"); 
 
printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); 
 
for (u = 0; u < HASH_SIZE; u++) 
INIT_LIST_HEAD(&mount_hashtable[u]);/*初始化hashtable链表*/ 
 
err = sysfs_init();/*sysfs文件系统初始化*/ 
if (err) 
printk(KERN_WARNING "%s: sysfs_init error: %d\n", 
__func__, err); 
fs_kobj = kobject_create_and_add("fs", NULL); 
if (!fs_kobj) 
printk(KERN_WARNING "%s: kobj create error\n", __func__); 
init_rootfs();/*初始化ramfs和rootfs*/ 
init_mount_tree();/*初始化mount tree*/ 

static void __init init_mount_tree(void) 

struct vfsmount *mnt; 
struct mnt_namespace *ns; 
struct path root; 
 
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 
if (IS_ERR(mnt)) 
panic("Can't create rootfs"); 
ns = create_mnt_ns(mnt);/*为mnt创建命名空间*/ 
if (IS_ERR(ns)) 
panic("Can't allocate initial namespace"); 
/*初始化进程的相关命名空间*/ 
init_task.nsproxy->mnt_ns = ns; 
get_mnt_ns(ns);/*命名空间的进程数加一*/ 
/*更新root的相关字段*/ 
root.mnt = ns->root; 
root.dentry = ns->root->mnt_root; 
/*设置fs的当前路径和当前root*/ 
set_fs_pwd(current->fs, &root); 
set_fs_root(current->fs, &root); 
}

2.4.1 创建命名空间

/**
 * create_mnt_ns - creates a private namespace and adds a root filesystem
 * @mnt: pointer to the new root filesystem mountpoint
 */ 
struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) 

struct mnt_namespace *new_ns; 
 
new_ns = alloc_mnt_ns();/*分配命名空间*/ 
if (!IS_ERR(new_ns)) { 
/*下面为和mnt建立关系*/ 
mnt->mnt_ns = new_ns; 
new_ns->root = mnt; 
list_add(&new_ns->list, &new_ns->root->mnt_list); 

return new_ns; 

static struct mnt_namespace *alloc_mnt_ns(void) 

struct mnt_namespace *new_ns; 
/*从cache中分配命名空间*/
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 
if (!new_ns) 
return ERR_PTR(-ENOMEM); 
/*下面为相关字段的初始化*/ 
atomic_set(&new_ns->count, 1); 
new_ns->root = NULL; 
INIT_LIST_HEAD(&new_ns->list); 
init_waitqueue_head(&new_ns->poll); 
new_ns->event = 0; 
return new_ns; 
}

2.4.2 创建mount

struct vfsmount * 
do_kern_mount(const char *fstype, int flags, const char *name, void *data) 

struct file_system_type *type = get_fs_type(fstype); 
struct vfsmount *mnt; 
if (!type) 
return ERR_PTR(-ENODEV); 
mnt = vfs_kern_mount(type, flags, name, data); 
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && 
!mnt->mnt_sb->s_subtype) 
mnt = fs_set_subtype(mnt, fstype); 
put_filesystem(type); 
return mnt; 

struct vfsmount * 
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 

struct vfsmount *mnt; 
char *secdata = NULL; 
int error; 
 
if (!type) 
return ERR_PTR(-ENODEV); 
 
error = -ENOMEM; 
/*从slab中分配一个mnt*/ 
mnt = alloc_vfsmnt(name); 
if (!mnt) 
goto out; 
 
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 
secdata = alloc_secdata(); 
if (!secdata) 
goto out_mnt; 
 
error = security_sb_copy_data(data, secdata); 
if (error) 
goto out_free_secdata; 

/*调用文件系统控制结构体的get_sb()*/ 
error = type->get_sb(type, flags, name, data, mnt); 
if (error < 0) 
goto out_free_secdata; 
BUG_ON(!mnt->mnt_sb); 
 
error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 
if (error) 
goto out_sb; 
 
/*
 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
 * but s_maxbytes was an unsigned long long for many releases. Throw
 * this warning for a little while to try and catch filesystems that
 * violate this rule. This warning should be either removed or
 * converted to a BUG() in 2.6.34.
 */ 
WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 
"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); 
/*初始化mnt相关字段*/ 
mnt->mnt_mountpoint = mnt->mnt_root; 
mnt->mnt_parent = mnt; 
up_write(&mnt->mnt_sb->s_umount); 
free_secdata(secdata); 
return mnt; 
out_sb: 
dput(mnt->mnt_root); 
deactivate_locked_super(mnt->mnt_sb); 
out_free_secdata: 
free_secdata(secdata); 
out_mnt: 
free_vfsmnt(mnt); 
out: 
return ERR_PTR(error); 

struct vfsmount *alloc_vfsmnt(const char *name) 

/*从slab中获得mnt*/ 
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 
/*下面进行对mnt的初始化*/ 
if (mnt) { 
int err; 
 
err = mnt_alloc_id(mnt); 
if (err) 
goto out_free_cache; 
 
if (name) { 
mnt->mnt_devname = kstrdup(name, GFP_KERNEL); 
if (!mnt->mnt_devname) 
goto out_free_id; 

 
atomic_set(&mnt->mnt_count, 1); 
INIT_LIST_HEAD(&mnt->mnt_hash); 
INIT_LIST_HEAD(&mnt->mnt_child); 
INIT_LIST_HEAD(&mnt->mnt_mounts); 
INIT_LIST_HEAD(&mnt->mnt_list); 
INIT_LIST_HEAD(&mnt->mnt_expire); 
INIT_LIST_HEAD(&mnt->mnt_share); 
INIT_LIST_HEAD(&mnt->mnt_slave_list); 
INIT_LIST_HEAD(&mnt->mnt_slave); 
#ifdef CONFIG_SMP  
mnt->mnt_writers = alloc_percpu(int); 
if (!mnt->mnt_writers) 
goto out_free_devname; 
#else  
mnt->mnt_writers = 0; 
#endif  

return mnt; 
 
#ifdef CONFIG_SMP  
out_free_devname: 
kfree(mnt->mnt_devname); 
#endif  
out_free_id: 
mnt_free_id(mnt); 
out_free_cache: 
kmem_cache_free(mnt_cache, mnt); 
return NULL; 

2.5 块设备驱动模型初始化

void __init bdev_cache_init(void) 

int err; 
struct vfsmount *bd_mnt; 
/*block cache初始化*/ 
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 
SLAB_MEM_SPREAD|SLAB_PANIC), 
init_once); 
/*注册block dev*/ 
err = register_filesystem(&bd_type); 
if (err) 
panic("Cannot register bdev pseudo-fs"); 
bd_mnt = kern_mount(&bd_type); 
if (IS_ERR(bd_mnt)) 
panic("Cannot create bdev pseudo-fs"); 
/*
 * This vfsmount structure is only used to obtain the
 * blockdev_superblock, so tell kmemleak not to report it.
 */ 
kmemleak_not_leak(bd_mnt); 
blockdev_superblock = bd_mnt->mnt_sb;/* For writeback */ 

2.6 字符设备驱动模型初始化

void __init chrdev_init(void) 

cdev_map = kobj_map_init(base_probe, &chrdevs_lock); 
/*字符设备驱动初始化*/ 
bdi_init(&directly_mappable_cdev_bdi); 
}


这里对linux虚拟文件系统的初始化工作做了整体的梳理,后面将对涉及到的细节做补充,包括inode和dentry cache shrinker的注册、sysfs的初始化等。