Omfs文件目录的结构是通过对name做hash来实现的:
定义inode_operations结构体,
const struct inode_operations omfs_dir_inops = {
.lookup = omfs_lookup,
.mkdir = omfs_mkdir,
.rename = omfs_rename,
.create = omfs_create,
.unlink = omfs_unlink,
.rmdir = omfs_rmdir,
};
系统调用mkdir dir1
Breakpoint 2, omfs_mkdir (dir=0xdee25d88, dentry=0xdedc8660, mode=493)
at fs/omfs/dir.c:298
298 return omfs_add_node(dir, dentry, mode | S_IFDIR);
(gdb) bt
#0 omfs_mkdir (dir=0xdee25d88, dentry=0xdedc8660, mode=493)
at fs/omfs/dir.c:298
#1 0xc029fdb6 in vfs_mkdir (dir=0xdee25d88, dentry=0xdedc8660, mode=493)
at fs/namei.c:2086
#2 0xc029fe8c in sys_mkdirat (dfd=-100, pathname=0xbf8de932 "dir1", mode=493)
at fs/namei.c:2116
#3 0xc029fef3 in sys_mkdir (pathname=0xbf8de932 "dir1", mode=511)
at fs/namei.c:2131
#4 0xc0104657 in ?? () at arch/x86/kernel/entry_32.S:457
可以看到系统调用的过程:
Sys_mkdiràsys_mkdiratàvfs_mkdiràomfs_mkdir
dir=0xdee25d88
dentry=0xdedc8660
p *dentry
$7 = {d_count = {counter = 1}, d_flags = 0, d_lock = {{rlock = {raw_lock = {
slock = 257}}}}, d_mounted = 0, d_inode = 0x0, d_hash = {next = 0x0,
pprev = 0xc144096c}, d_parent = 0xdf4ea198, d_name = {hash = 25987720,
len = 4, name = 0xdedc86bc "dir1"}, d_lru = {next = 0xdedc868c,
prev = 0xdedc868c}, d_u = {d_child = {next = 0xdf4ea1d4,
prev = 0xdf4ea1d4}, d_rcu = {next = 0xdf4ea1d4, func = 0xdf4ea1d4}},
d_subdirs = {next = 0xdedc869c, prev = 0xdedc869c}, d_alias = {
next = 0xdedc86a4, prev = 0xdedc86a4}, d_time = 1701080941, d_op = 0x0,
d_sb = 0xdd91e400, d_fsdata = 0x0,
d_iname = "dir1\000ne\000_larval_drop\000__ticket_spin_unloc"}
p *(struct dentry *)0xdf4ea198
$8 = {d_count = {counter = 6}, d_flags = 16, d_lock = {{rlock = {raw_lock = {
slock = 1028}}}}, d_mounted = 0, d_inode = 0xdee25d88, d_hash = {
next = 0x0, pprev = 0x0}, d_parent = 0xdf4ea198, d_name = {hash = 0,
len = 1, name = 0xdf4ea1f4 "/"}, d_lru = {next = 0xdf4ea1c4,
prev = 0xdf4ea1c4}, d_u = {d_child = {next = 0xdf4ea1cc,
prev = 0xdf4ea1cc}, d_rcu = {next = 0xdf4ea1cc, func = 0xdf4ea1cc}},
d_subdirs = {next = 0xdedc8694, prev = 0xdedc8694}, d_alias = {
next = 0xdee25da0, prev = 0xdee25da0}, d_time = 0, d_op = 0x0,
d_sb = 0xdd91e400, d_fsdata = 0x0,
d_iname = "/\000v\000-linux-gnu\000\337`M|\300\340L|\300\000d\200\337\000\000\000\000\030\242N\337h\241N", <incomplete sequence \337>}
p *dir
$6 = {i_hash = {next = 0x0, pprev = 0xc1463a90}, i_list = {next = 0xdf597480,
prev = 0xd2335648}, i_sb_list = {next = 0xdd91e474, prev = 0xdd91e474},
i_dentry = {next = 0xdf4ea1dc, prev = 0xdf4ea1dc}, i_ino = 3, i_count = {
counter = 1}, i_nlink = 2, i_uid = 0, i_gid = 0, i_rdev = 0,
i_blkbits = 11, i_version = 0, i_size = 2048, i_size_seqcount = {
sequence = 0}, i_atime = {tv_sec = 1328683425, tv_nsec = 0}, i_mtime = {
tv_sec = 1328683648, tv_nsec = 915000}, i_ctime = {tv_sec = 1328683648,
tv_nsec = 915000}, i_blocks = 0, i_bytes = 0, i_mode = 16877, i_lock = {{
rlock = {raw_lock = {slock = 514}}}}, i_mutex = {count = {counter = 0},
wait_lock = {{rlock = {raw_lock = {slock = 0}}}}, wait_list = {
next = 0xdee25e08, prev = 0xdee25e08}, owner = 0xddb0c000},
i_alloc_sem = {count = 0, wait_lock = {{rlock = {raw_lock = {slock = 0}}}},
wait_list = {next = 0xdee25e1c, prev = 0xdee25e1c}}, i_op = 0xe27ccb60,
i_fop = 0xe27ccbc0, i_sb = 0xdd91e400, i_flock = 0x0,
i_mapping = 0xdee25e38, i_data = {host = 0xdee25d88, page_tree = {
height = 0, gfp_mask = 32, rnode = 0x0}, tree_lock = {{rlock = {
raw_lock = {slock = 0}}}}, i_mmap_writable = 0, i_mmap = {
prio_tree_node = 0x0, index_bits = 1, raw = 1}, i_mmap_nonlinear = {
next = 0xdee25e58, prev = 0xdee25e58}, i_mmap_lock = {{rlock = {
raw_lock = {slock = 0}}}}, truncate_count = 0, nrpages = 0,
writeback_index = 0, a_ops = 0xe27ccd20, flags = 131290,
backing_dev_info = 0xd233559c, private_lock = {{rlock = {raw_lock = {
slock = 0}}}}, private_list = {next = 0xdee25e80,
---Type <return> to continue, or q <return> to quit---
prev = 0xdee25e80}, assoc_mapping = 0x0}, i_dquot = {0x0, 0x0},
i_devices = {next = 0xdee25e94, prev = 0xdee25e94}, {i_pipe = 0x0,
i_bdev = 0x0, i_cdev = 0x0}, i_generation = 0, i_fsnotify_mask = 0,
i_fsnotify_mark_entries = {first = 0x0}, inotify_watches = {
next = 0xdee25eac, prev = 0xdee25eac}, inotify_mutex = {count = {
counter = 1}, wait_lock = {{rlock = {raw_lock = {slock = 0}}}},
wait_list = {next = 0xdee25ebc, prev = 0xdee25ebc}, owner = 0x0},
i_state = 1, dirtied_when = 4294956485, i_flags = 0, i_writecount = {
counter = 0}, i_security = 0x0, i_acl = 0xffffffff,
i_default_acl = 0xffffffff, i_private = 0x0}
inode *dir 是新挂载omfs文件系统的根目录的inode,dentry *dentry是dir1。
static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
return omfs_add_node(dir, dentry, mode | S_IFDIR);
}
static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
{
int err;
struct inode *inode = omfs_new_inode(dir, mode);------------------------à1
if (IS_ERR(inode))
return PTR_ERR(inode);
err = omfs_make_empty(inode, dir->i_sb);------------------------------à2
if (err)
goto out_free_inode;
err = omfs_add_link(dentry, inode);-------------------------------------à3
if (err)
goto out_free_inode;
d_instantiate(dentry, inode);---------------------------------------------à4
return 0;
out_free_inode:
iput(inode);
return err;
}
1、 omfs_new_inode (dir=0xdee25d88, mode=16877) at fs/omfs/inode.c:29
分配新的inode
A、
struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb);
inode = new_inode(dir->i_sb);
B、为新分配的inode确定索引节点号
err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors,
&new_block, &len);
omfs_allocate_range (sb=0xdd91e400, min_request=2, max_request=2,
return_block=0xddb0deb8, return_size=0xddb0decc) at fs/omfs/bitmap.c:135
int omfs_allocate_range(struct super_block *sb,
int min_request,
int max_request,
u64 *return_block,
int *return_size)
{
struct omfs_sb_info *sbi = OMFS_SB(sb);
int bits_per_entry = 8 * sb->s_blocksize; 16384=8×2048
int ret = 0;
int i, run, bit;
mutex_lock(&sbi->s_bitmap_lock);
for (i = 0; i < sbi->s_imap_size; i++) {// s_imap_size = 1
bit = 0;
while (bit < bits_per_entry) {
bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry,
bit);
if (bit == bits_per_entry)
break;
run = count_run(&sbi->s_imap[i], bits_per_entry,
sbi->s_imap_size-i, bit, max_request);
if (run >= min_request)
goto found;
bit += run;
}
}
ret = -ENOSPC;
goto out;
found:
*return_block = i * bits_per_entry + bit;
*return_size = run;
ret = set_run(sb, i, bits_per_entry, bit, run, 1);
out:
mutex_unlock(&sbi->s_bitmap_lock);
return ret;
}
(gdb) p *sbi->s_imap[0]
$19 = 63
63 = 0011 1111
Bit是0的位数,是6
150 run = count_run(&sbi->s_imap[i], bits_per_entry,
(gdb) s
count_run (addr=0xde0f8268, nbits=16384, addrlen=1, bit=6, max=2)
at fs/omfs/bitmap.c:28
到
found:
*return_block = i * bits_per_entry + bit;
*return_size = run;
ret = set_run(sb, i, bits_per_entry, bit, run, 1);
将
p *sbi->s_imap[0]
$29 = 127
位图127 = 0111 1111 新的block表示被占用了
填充inode结构的体的各个关键项:
inode->i_ino = new_block;---à6
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_mapping->a_ops = &omfs_aops;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
因为创建的是文件:
case S_IFDIR:
inode->i_op = &omfs_dir_inops;
inode->i_fop = &omfs_dir_operations;
inode->i_size = sbi->s_sys_blocksize;
inc_nlink(inode);
2、 omfs_make_empty (inode=0xdee25ab8, sb=0xdd91e400) at fs/omfs/dir.c:88
读取inode为6的block,地址为0xc000
因为inode是目录,所以将OMFS_DIR_START = 0x1b8开始到0x800都设为0xff。
将
oi->i_head.h_self = cpu_to_be64(inode->i_ino); 设为6
oi->i_sibling = ~cpu_to_be64(0ULL); 设为ffff ffff ffff ffff
3、 omfs_add_link (dentry=0xdedc8660, inode=0xdee25ab8) at fs/omfs/dir.c:116
利用hash建立上级目录和目录包含文件之间的联系
static int omfs_add_link(struct dentry *dentry, struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode; dir就是dir1目录上级/目录的inode
const char *name = dentry->d_name.name; “dir1”
int namelen = dentry->d_name.len; 4
struct omfs_inode *oi;
struct buffer_head *bh;
u64 block;
__be64 *entry;
int ofs;
/* just prepend to head of queue in proper bucket */
bh = omfs_get_bucket(dir, name, namelen, &ofs);--------------
if (!bh)
goto out;
entry = (__be64 *) &bh->b_data[ofs];
block = be64_to_cpu(*entry);
*entry = cpu_to_be64(inode->i_ino);
mark_buffer_dirty(bh);
brelse(bh);
/* now set the sibling and parent pointers on the new inode */
bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
if (!bh)
goto out;
oi = (struct omfs_inode *) bh->b_data;
memcpy(oi->i_name, name, namelen);
memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);
oi->i_sibling = cpu_to_be64(block);
oi->i_parent = cpu_to_be64(dir->i_ino);
mark_buffer_dirty(bh);
brelse(bh);
dir->i_ctime = CURRENT_TIME_SEC;
/* mark affected inodes dirty to rebuild checksums */
mark_inode_dirty(dir);
mark_inode_dirty(inode);
return 0;
out:
return -ENOMEM;
}
bh = omfs_get_bucket(dir, name, namelen, &ofs);
static struct buffer_head *omfs_get_bucket(struct inode *dir,
const char *name, int namelen, int *ofs)
{
int nbuckets = (dir->i_size - OMFS_DIR_START)/8; //(2048-0x1b8)/8=201
int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino); dir->i_no=3 所以 block=12
int bucket = omfs_hash(name, namelen, nbuckets); 将name做hash值:45
*ofs = OMFS_DIR_START + bucket * 8; 0x1b8+45*8=0x320
return sb_bread(dir->i_sb, block);
}
entry = (__be64 *) &bh->b_data[ofs];
block = be64_to_cpu(*entry);
*entry = cpu_to_be64(inode->i_ino);
这样将新建dir1的inode的i_ino=6放在了0x320的偏移地址上。
读dir1所在的block,设置omfs_inode的一些值:
oi = (struct omfs_inode *) bh->b_data;
memcpy(oi->i_name, name, namelen);“dir1”
memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);
oi->i_sibling = cpu_to_be64(block);
oi->i_parent = cpu_to_be64(dir->i_ino);3
4、 d_instantiate(dentry, inode);
建立dentry和inode的联系
将dentry->d_inode = inode;
这样,一个文件夹就建立了,现在再看看硬盘布局的改变:
在0x6320的位置是:0000 0000 0000 0006 这就是dir1的inode号
再看看0x2000×6 = 0xc000地址
其实经过了上面了过程,实际的数据并没有完全写入硬盘,等一段时间后才会写入。
因为是块设备,不会在一有读写的时候就立即写入硬盘,而是通过一定的策略去写硬盘,这样会大大提高读写的效率。每一次写入都要寻址,这个磁盘寻址是整个计算机最慢的操作之一。为了优化寻址操作,内核既不会简单的按请求接受次序,也不会立即将其提交给磁盘。相反,它会在提交前,先执行名为合并与排序的预操作,这种预操作可以极大地提高系统的整体性能。在内核中负责提交I/O请求的子系统称为I/O调度程序。
我们这里先不讨论I/O调度程序的策略算法,我们先看看实际的写入操作。
在Breakpoint 2, omfs_write_inode (inode=0xdde13c20, wbc=0xde455ebc) 下断点
Mkdir dir1后,过一段时间会到这个断点,看看函数堆栈:
#0 omfs_write_inode (inode=0xdde13c20, wbc=0xde455ebc) at fs/omfs/inode.c:169
#1 0xc02b749c in write_inode (inode=0xdde13c20, wbc=0xde455ebc)
at fs/fs-writeback.c:388
#2 0xc02b76fb in writeback_single_inode (inode=0xdde13c20, wbc=0xde455ebc)
at fs/fs-writeback.c:477
#3 0xc02b79f1 in writeback_sb_inodes (sb=0xde72e600, wb=0xd2339630,
wbc=0xde455ebc) at fs/fs-writeback.c:640
#4 0xc02b7b58 in writeback_inodes_wb (wb=0xd2339630, wbc=0xde455ebc)
at fs/fs-writeback.c:691
#5 0xc02b7d94 in wb_writeback (wb=0xd2339630, args=0xde455f14)
at fs/fs-writeback.c:786
#6 0xc02b7fd6 in wb_check_old_data_flush (wb=0xd2339630)
at fs/fs-writeback.c:879
#7 0xc02b8086 in wb_do_writeback (wb=0xd2339630, force_wait=0)
at fs/fs-writeback.c:923
#8 0xc02b80bd in bdi_writeback_task (wb=0xd2339630) at fs/fs-writeback.c:939
#9 0xc024ec76 in bdi_start_fn (ptr=0xd2339630) at mm/backing-dev.c:316
#10 0xc019ce95 in kthread (_create=0xdf923f28) at kernel/kthread.c:78
#11 0xc0104c06 in ?? () at arch/x86/kernel/entry_32.S:1051
可以看到我们writeback的是”/”目录的inode。
这个断点会再停一次,因为我们会回写”dir1”对应的inode:
接下来,看看目录的删除:
系统调用rmdir dir1
#0 omfs_rmdir (dir=0xdee25d88, dentry=0xdedc8660) at fs/omfs/dir.c:261
#1 0xc029fff3 in vfs_rmdir (dir=0xdee25d88, dentry=0xdedc8660)
at fs/namei.c:2178
#2 0xc02a012a in do_rmdir (dfd=-100, pathname=0xbfe69932 "dir1")
at fs/namei.c:2230
#3 0xc02a0182 in sys_rmdir (pathname=0xbfe69932 "dir1") at fs/namei.c:2245
#4 0xc0104657 in ?? () at arch/x86/kernel/entry_32.S:457
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
Sys_rmdiràdo_rmdiràvfs_rmdiràomfs_rmdir
Dir是根目录的inode,dentry是dir1的目录项。
static int omfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = -ENOTEMPTY;
struct inode *inode = dentry->d_inode;
if (omfs_dir_is_empty(inode)) {
err = omfs_unlink(dir, dentry);
if (!err)
inode_dec_link_count(inode);
}
return err;
}
omfs_dir_is_empty(inode)判断dir1目录是不是空目录
判断方法很简单:
查找dir1对应的block中的数据,从OMFS_DIR_START到0x800查找,看每一个64位的数据都是否等于0xffff ffff ffff ffff,如果有一个不等,那么目录不为空。
若目录不为空,omfs_rmdir返回ENOTEMPTY错误号,
若目录为空,则
omfs_unlink(dir, dentry);
static int omfs_unlink(struct inode *dir, struct dentry *dentry)
{
int ret;
struct inode *inode = dentry->d_inode;
ret = omfs_delete_entry(dentry);
if (ret)
goto end_unlink;
inode_dec_link_count(inode);
mark_inode_dirty(dir);
end_unlink:
return ret;
}
可以看出,在删除文件夹的时候,或者说在删除文件的时候,我们删除了entry,并没有动文件的内容,其实可以理解,entry删除了,那些内容也不会用到。
Omfs_delete_inode //当entry删除时,这个函数会调用,需要清除bitmap中的相应位。
调用关系:
Breakpoint 4, omfs_delete_inode (inode=0xdee3c708) at fs/omfs/inode.c:183
183 truncate_inode_pages(&inode->i_data, 0);
(gdb) bt
#0 omfs_delete_inode (inode=0xdee3c708) at fs/omfs/inode.c:183
#1 0xc02ab9fb in generic_delete_inode (inode=0xdee3c708) at fs/inode.c:1216
#2 0xc02abcb2 in generic_drop_inode (inode=0xdee3c708) at fs/inode.c:1290
#3 0xc02abd03 in iput_final (inode=0xdee3c708) at fs/inode.c:1314
#4 0xc02abd4f in iput (inode=0xdee3c708) at fs/inode.c:1332
#5 0xc02a710e in dentry_iput (dentry=0xdef364c8) at fs/dcache.c:118
#6 0xc02a7287 in d_kill (dentry=0xdef364c8) at fs/dcache.c:177
#7 0xc02a73b4 in dput (dentry=0xdef364c8) at fs/dcache.c:256
#8 0xc02a0143 in do_rmdir (dfd=-100, pathname=0xbfdcd932 "dir1")
at fs/namei.c:2234
#9 0xc02a0182 in sys_rmdir (pathname=0xbfdcd932 "dir1") at fs/namei.c:2245
#10 0xc0104657 in ?? () at arch/x86/kernel/entry_32.S:457
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
对于删除文件的inode,omfs_clear_range(inode->i_sb, inode->i_ino, 2);
omfs_clear_range (sb=0xdd91e400, block=6, count=2) at fs/omfs/bitmap.c:176
这个函数就会删除bitmap相应的位。