Linux虚拟文件系统在内核初始化的start_kernel()函数中主要调用两个函数来实现。
asmlinkage void __init start_kernel(void)
{
……
vfs_caches_init_early();
……
vfs_caches_init(totalram_pages);
……
}
一、早期初始化
虚拟文件系统的早期初始化有函数vfs_caches_init_early()实现,主要负责dentry和inode的hashtable的初始化工作。
/*在start_kernel中调用,用于文件系统中早期的初始化*/
void __init vfs_caches_init_early(void)
{
/*初始化两个hashtable*/
dcache_init_early();
inode_init_early();
}
1.1 dcache
static void __init dcache_init_early(void)
{
int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
if (hashdist)
return;
/*dentry hashtable的空间分配*/
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_head),
dhash_entries,
13,
HASH_EARLY,
&d_hash_shift,
&d_hash_mask,
0);
/*hashtable的各个链表初始化*/
for (loop = 0; loop < (1 << d_hash_shift); loop++)
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
1.2 inode
/*
* Initialize the waitqueues and inode hash table.
*/
void __init inode_init_early(void)
{
int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
if (hashdist)
return;
/*从cache中分配inode hashtable的内存空间*/
inode_hashtable =
alloc_large_system_hash("Inode-cache",
sizeof(struct hlist_head),
ihash_entries,
14,
HASH_EARLY,
&i_hash_shift,
&i_hash_mask,
0);
/*初始化hashtable 的各个链表*/
for (loop = 0; loop < (1 << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
二、后期初始化
这阶段对inode、dentry、mount、字符设备驱动模型以及块设备驱动模型做了相应的初始化。
/*vfs初始化,在start_kernel中调用*/
void __init vfs_caches_init(unsigned long mempages)
{
unsigned long reserve;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
mempages -= reserve;
/*为路径名申请的cache*/
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
/*dentry及其相关内容初始化*/
dcache_init();
inode_init();/*inode初始化*/
files_init(mempages);/*文件相关信息初始化,包括文件描述符表初始化*/
mnt_init();/*mount 的初始化*/
bdev_cache_init();
/*字符设备驱动模型的初始化*/
chrdev_init();
}
2.1 dentry初始化
static void __init dcache_init(void)
{
int loop;
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
* of the dcache.
*//*从cache中申请目录cache*/
dentry_cache = KMEM_CACHE(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
/*注册一个shrinker*/
register_shrinker(&dcache_shrinker);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
return;
/*下面的操作在前面的初始化中已经做了*/
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_head),
dhash_entries,
13,
0,
&d_hash_shift,
&d_hash_mask,
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
2.2 inode初始化
void __init inode_init(void)
{
int loop;
/* inode slab cache */
/*slab中分配inode缓存*/
inode_cachep = kmem_cache_create("inode_cache",
sizeof(struct inode),
0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
init_once);
/*注册icache shrinker,将参数对应的shrinker加入指定队列*/
register_shrinker(&icache_shrinker);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
return;
/*分配数组对应空间*/
inode_hashtable =
alloc_large_system_hash("Inode-cache",
sizeof(struct hlist_head),
ihash_entries,
14,
0,
&i_hash_shift,
&i_hash_mask,
0);
/*初始化链表组*/
for (loop = 0; loop < (1 << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
2.3 files初始化
void __init files_init(unsigned long mempages)
{
int n;
/*申请文件cache*/
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
/*
* One file with associated inode and dcache is very roughly 1K.
* Per default don't use more than 10% of our memory for files.
*/
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = n; /*更新文件统计信息*/
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
files_defer_init();/*释放文件描述符表*/
percpu_counter_init(&nr_files, 0);
}
2.4 mount初始化
void __init mnt_init(void)
{
unsigned u;
int err;
init_rwsem(&namespace_sem);
/*mnt cache初始化*/
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
/*mount hashtable内存申请*/
mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
if (!mount_hashtable)
panic("Failed to allocate mount hash table\n");
printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mount_hashtable[u]);/*初始化hashtable链表*/
err = sysfs_init();/*sysfs文件系统初始化*/
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
__func__, err);
fs_kobj = kobject_create_and_add("fs", NULL);
if (!fs_kobj)
printk(KERN_WARNING "%s: kobj create error\n", __func__);
init_rootfs();/*初始化ramfs和rootfs*/
init_mount_tree();/*初始化mount tree*/
}
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mnt_namespace *ns;
struct path root;
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
ns = create_mnt_ns(mnt);/*为mnt创建命名空间*/
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
/*初始化进程的相关命名空间*/
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);/*命名空间的进程数加一*/
/*更新root的相关字段*/
root.mnt = ns->root;
root.dentry = ns->root->mnt_root;
/*设置fs的当前路径和当前root*/
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
}
2.4.1 创建命名空间
/**
* create_mnt_ns - creates a private namespace and adds a root filesystem
* @mnt: pointer to the new root filesystem mountpoint
*/
struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
{
struct mnt_namespace *new_ns;
new_ns = alloc_mnt_ns();/*分配命名空间*/
if (!IS_ERR(new_ns)) {
/*下面为和mnt建立关系*/
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
list_add(&new_ns->list, &new_ns->root->mnt_list);
}
return new_ns;
}
static struct mnt_namespace *alloc_mnt_ns(void)
{
struct mnt_namespace *new_ns;
/*从cache中分配命名空间*/
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
if (!new_ns)
return ERR_PTR(-ENOMEM);
/*下面为相关字段的初始化*/
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
return new_ns;
}
2.4.2 创建mount
struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
struct file_system_type *type = get_fs_type(fstype);
struct vfsmount *mnt;
if (!type)
return ERR_PTR(-ENODEV);
mnt = vfs_kern_mount(type, flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
put_filesystem(type);
return mnt;
}
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
char *secdata = NULL;
int error;
if (!type)
return ERR_PTR(-ENODEV);
error = -ENOMEM;
/*从slab中分配一个mnt*/
mnt = alloc_vfsmnt(name);
if (!mnt)
goto out;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
secdata = alloc_secdata();
if (!secdata)
goto out_mnt;
error = security_sb_copy_data(data, secdata);
if (error)
goto out_free_secdata;
}
/*调用文件系统控制结构体的get_sb()*/
error = type->get_sb(type, flags, name, data, mnt);
if (error < 0)
goto out_free_secdata;
BUG_ON(!mnt->mnt_sb);
error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
if (error)
goto out_sb;
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
* this warning for a little while to try and catch filesystems that
* violate this rule. This warning should be either removed or
* converted to a BUG() in 2.6.34.
*/
WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
/*初始化mnt相关字段*/
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
free_secdata(secdata);
return mnt;
out_sb:
dput(mnt->mnt_root);
deactivate_locked_super(mnt->mnt_sb);
out_free_secdata:
free_secdata(secdata);
out_mnt:
free_vfsmnt(mnt);
out:
return ERR_PTR(error);
}
struct vfsmount *alloc_vfsmnt(const char *name)
{
/*从slab中获得mnt*/
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
/*下面进行对mnt的初始化*/
if (mnt) {
int err;
err = mnt_alloc_id(mnt);
if (err)
goto out_free_cache;
if (name) {
mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
if (!mnt->mnt_devname)
goto out_free_id;
}
atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
INIT_LIST_HEAD(&mnt->mnt_expire);
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
#ifdef CONFIG_SMP
mnt->mnt_writers = alloc_percpu(int);
if (!mnt->mnt_writers)
goto out_free_devname;
#else
mnt->mnt_writers = 0;
#endif
}
return mnt;
#ifdef CONFIG_SMP
out_free_devname:
kfree(mnt->mnt_devname);
#endif
out_free_id:
mnt_free_id(mnt);
out_free_cache:
kmem_cache_free(mnt_cache, mnt);
return NULL;
}
2.5 块设备驱动模型初始化
void __init bdev_cache_init(void)
{
int err;
struct vfsmount *bd_mnt;
/*block cache初始化*/
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD|SLAB_PANIC),
init_once);
/*注册block dev*/
err = register_filesystem(&bd_type);
if (err)
panic("Cannot register bdev pseudo-fs");
bd_mnt = kern_mount(&bd_type);
if (IS_ERR(bd_mnt))
panic("Cannot create bdev pseudo-fs");
/*
* This vfsmount structure is only used to obtain the
* blockdev_superblock, so tell kmemleak not to report it.
*/
kmemleak_not_leak(bd_mnt);
blockdev_superblock = bd_mnt->mnt_sb;/* For writeback */
}
2.6 字符设备驱动模型初始化
void __init chrdev_init(void)
{
cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
/*字符设备驱动初始化*/
bdi_init(&directly_mappable_cdev_bdi);
}
这里对linux虚拟文件系统的初始化工作做了整体的梳理,后面将对涉及到的细节做补充,包括inode和dentry cache shrinker的注册、sysfs的初始化等。