红联Linux门户
Linux帮助

Linux文件系统omfs的目录创建和删除

发布时间:2014-11-24 21:48:14来源:linux网站作者:dndxhej

Omfs文件目录的结构是通过对name做hash来实现的:

定义inode_operations结构体,

const struct inode_operations omfs_dir_inops = {

.lookup = omfs_lookup,

.mkdir = omfs_mkdir,

.rename = omfs_rename,

.create = omfs_create,

.unlink = omfs_unlink,

.rmdir = omfs_rmdir,

};

 

系统调用mkdir dir1

 

Breakpoint 2, omfs_mkdir (dir=0xdee25d88, dentry=0xdedc8660, mode=493)

at fs/omfs/dir.c:298

298   return omfs_add_node(dir, dentry, mode | S_IFDIR);

(gdb) bt

#0  omfs_mkdir (dir=0xdee25d88, dentry=0xdedc8660, mode=493)

at fs/omfs/dir.c:298

#1  0xc029fdb6 in vfs_mkdir (dir=0xdee25d88, dentry=0xdedc8660, mode=493)

at fs/namei.c:2086

#2  0xc029fe8c in sys_mkdirat (dfd=-100, pathname=0xbf8de932 "dir1", mode=493)

at fs/namei.c:2116

#3  0xc029fef3 in sys_mkdir (pathname=0xbf8de932 "dir1", mode=511)

at fs/namei.c:2131

#4  0xc0104657 in ?? () at arch/x86/kernel/entry_32.S:457

 

可以看到系统调用的过程:

Sys_mkdiràsys_mkdiratàvfs_mkdiràomfs_mkdir

dir=0xdee25d88

dentry=0xdedc8660

 

p *dentry

$7 = {d_count = {counter = 1}, d_flags = 0, d_lock = {{rlock = {raw_lock = {

 slock = 257}}}}, d_mounted = 0, d_inode = 0x0, d_hash = {next = 0x0,

pprev = 0xc144096c}, d_parent = 0xdf4ea198, d_name = {hash = 25987720,

len = 4, name = 0xdedc86bc "dir1"}, d_lru = {next = 0xdedc868c,

prev = 0xdedc868c}, d_u = {d_child = {next = 0xdf4ea1d4,

  prev = 0xdf4ea1d4}, d_rcu = {next = 0xdf4ea1d4, func = 0xdf4ea1d4}},

  d_subdirs = {next = 0xdedc869c, prev = 0xdedc869c}, d_alias = {

next = 0xdedc86a4, prev = 0xdedc86a4}, d_time = 1701080941, d_op = 0x0,

  d_sb = 0xdd91e400, d_fsdata = 0x0,

  d_iname = "dir1\000ne\000_larval_drop\000__ticket_spin_unloc"}

 

p *(struct dentry *)0xdf4ea198

$8 = {d_count = {counter = 6}, d_flags = 16, d_lock = {{rlock = {raw_lock = {

 slock = 1028}}}}, d_mounted = 0, d_inode = 0xdee25d88, d_hash = {

next = 0x0, pprev = 0x0}, d_parent = 0xdf4ea198, d_name = {hash = 0,

len = 1, name = 0xdf4ea1f4 "/"}, d_lru = {next = 0xdf4ea1c4,

prev = 0xdf4ea1c4}, d_u = {d_child = {next = 0xdf4ea1cc,

  prev = 0xdf4ea1cc}, d_rcu = {next = 0xdf4ea1cc, func = 0xdf4ea1cc}},

  d_subdirs = {next = 0xdedc8694, prev = 0xdedc8694}, d_alias = {

next = 0xdee25da0, prev = 0xdee25da0}, d_time = 0, d_op = 0x0,

  d_sb = 0xdd91e400, d_fsdata = 0x0,

  d_iname = "/\000v\000-linux-gnu\000\337`M|\300\340L|\300\000d\200\337\000\000\000\000\030\242N\337h\241N", <incomplete sequence \337>}

 

p *dir

$6 = {i_hash = {next = 0x0, pprev = 0xc1463a90}, i_list = {next = 0xdf597480,

prev = 0xd2335648}, i_sb_list = {next = 0xdd91e474, prev = 0xdd91e474},

  i_dentry = {next = 0xdf4ea1dc, prev = 0xdf4ea1dc}, i_ino = 3, i_count = {

counter = 1}, i_nlink = 2, i_uid = 0, i_gid = 0, i_rdev = 0,

  i_blkbits = 11, i_version = 0, i_size = 2048, i_size_seqcount = {

sequence = 0}, i_atime = {tv_sec = 1328683425, tv_nsec = 0}, i_mtime = {

tv_sec = 1328683648, tv_nsec = 915000}, i_ctime = {tv_sec = 1328683648,

tv_nsec = 915000}, i_blocks = 0, i_bytes = 0, i_mode = 16877, i_lock = {{

  rlock = {raw_lock = {slock = 514}}}}, i_mutex = {count = {counter = 0},

wait_lock = {{rlock = {raw_lock = {slock = 0}}}}, wait_list = {

  next = 0xdee25e08, prev = 0xdee25e08}, owner = 0xddb0c000},

  i_alloc_sem = {count = 0, wait_lock = {{rlock = {raw_lock = {slock = 0}}}},

wait_list = {next = 0xdee25e1c, prev = 0xdee25e1c}}, i_op = 0xe27ccb60,

  i_fop = 0xe27ccbc0, i_sb = 0xdd91e400, i_flock = 0x0,

  i_mapping = 0xdee25e38, i_data = {host = 0xdee25d88, page_tree = {

  height = 0, gfp_mask = 32, rnode = 0x0}, tree_lock = {{rlock = {

 raw_lock = {slock = 0}}}}, i_mmap_writable = 0, i_mmap = {

  prio_tree_node = 0x0, index_bits = 1, raw = 1}, i_mmap_nonlinear = {

  next = 0xdee25e58, prev = 0xdee25e58}, i_mmap_lock = {{rlock = {

 raw_lock = {slock = 0}}}}, truncate_count = 0, nrpages = 0,

writeback_index = 0, a_ops = 0xe27ccd20, flags = 131290,

backing_dev_info = 0xd233559c, private_lock = {{rlock = {raw_lock = {

   slock = 0}}}}, private_list = {next = 0xdee25e80,

---Type <return> to continue, or q <return> to quit---

  prev = 0xdee25e80}, assoc_mapping = 0x0}, i_dquot = {0x0, 0x0},

  i_devices = {next = 0xdee25e94, prev = 0xdee25e94}, {i_pipe = 0x0,

i_bdev = 0x0, i_cdev = 0x0}, i_generation = 0, i_fsnotify_mask = 0,

  i_fsnotify_mark_entries = {first = 0x0}, inotify_watches = {

next = 0xdee25eac, prev = 0xdee25eac}, inotify_mutex = {count = {

  counter = 1}, wait_lock = {{rlock = {raw_lock = {slock = 0}}}},

wait_list = {next = 0xdee25ebc, prev = 0xdee25ebc}, owner = 0x0},

  i_state = 1, dirtied_when = 4294956485, i_flags = 0, i_writecount = {

counter = 0}, i_security = 0x0, i_acl = 0xffffffff,

  i_default_acl = 0xffffffff, i_private = 0x0}

inode *dir 是新挂载omfs文件系统的根目录的inode,dentry *dentry是dir1。

 

static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)

{

return omfs_add_node(dir, dentry, mode | S_IFDIR);

}

 

static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)

{

int err;

struct inode *inode = omfs_new_inode(dir, mode);------------------------à1

 

if (IS_ERR(inode))

return PTR_ERR(inode);

 

err = omfs_make_empty(inode, dir->i_sb);------------------------------à2

if (err)

goto out_free_inode;

 

err = omfs_add_link(dentry, inode);-------------------------------------à3

if (err)

goto out_free_inode;

 

d_instantiate(dentry, inode);---------------------------------------------à4

return 0;

 

out_free_inode:

iput(inode);

return err;

}

 

1、  omfs_new_inode (dir=0xdee25d88, mode=16877) at fs/omfs/inode.c:29

分配新的inode

A、 

struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb);

 

inode = new_inode(dir->i_sb);

B、为新分配的inode确定索引节点号

err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors,

&new_block, &len);

omfs_allocate_range (sb=0xdd91e400, min_request=2, max_request=2,

return_block=0xddb0deb8, return_size=0xddb0decc) at fs/omfs/bitmap.c:135

 

int omfs_allocate_range(struct super_block *sb,

int min_request,

int max_request,

u64 *return_block,

int *return_size)

{

 struct omfs_sb_info *sbi = OMFS_SB(sb);

 int bits_per_entry = 8 * sb->s_blocksize; 16384=8×2048

 int ret = 0;

 int i, run, bit;

 

 mutex_lock(&sbi->s_bitmap_lock);

 for (i = 0; i < sbi->s_imap_size; i++) {// s_imap_size = 1

bit = 0;

while (bit < bits_per_entry) {

bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry,

bit);

 

if (bit == bits_per_entry)

break;

 

run = count_run(&sbi->s_imap[i], bits_per_entry,

sbi->s_imap_size-i, bit, max_request);

 

if (run >= min_request)

goto found;

bit += run;

}

 }

 ret = -ENOSPC;

 goto out;

 

found:

 *return_block = i * bits_per_entry + bit;

 *return_size = run;

 ret = set_run(sb, i, bits_per_entry, bit, run, 1);

 

out:

 mutex_unlock(&sbi->s_bitmap_lock);

 return ret;

}

 

(gdb) p *sbi->s_imap[0]

$19 = 63

63 = 0011 1111

Bit是0的位数,是6

 

150   run = count_run(&sbi->s_imap[i], bits_per_entry,

(gdb) s

count_run (addr=0xde0f8268, nbits=16384, addrlen=1, bit=6, max=2)

at fs/omfs/bitmap.c:28

 

found:

 *return_block = i * bits_per_entry + bit;

 *return_size = run;

 ret = set_run(sb, i, bits_per_entry, bit, run, 1);

p *sbi->s_imap[0]

$29 = 127

位图127 = 0111 1111 新的block表示被占用了

 

填充inode结构的体的各个关键项:

 inode->i_ino = new_block;---à6

 inode->i_mode = mode;

 inode->i_uid = current_fsuid();

 inode->i_gid = current_fsgid();

 inode->i_mapping->a_ops = &omfs_aops;

 

 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;

因为创建的是文件:

 case S_IFDIR:

inode->i_op = &omfs_dir_inops;

inode->i_fop = &omfs_dir_operations;

inode->i_size = sbi->s_sys_blocksize;

inc_nlink(inode);

2、  omfs_make_empty (inode=0xdee25ab8, sb=0xdd91e400) at fs/omfs/dir.c:88

读取inode为6的block,地址为0xc000

因为inode是目录,所以将OMFS_DIR_START = 0x1b8开始到0x800都设为0xff。

oi->i_head.h_self = cpu_to_be64(inode->i_ino);   设为6

oi->i_sibling = ~cpu_to_be64(0ULL); 设为ffff ffff ffff ffff

 

3、  omfs_add_link (dentry=0xdedc8660, inode=0xdee25ab8) at fs/omfs/dir.c:116

利用hash建立上级目录和目录包含文件之间的联系

static int omfs_add_link(struct dentry *dentry, struct inode *inode)

{

struct inode *dir = dentry->d_parent->d_inode;  dir就是dir1目录上级/目录的inode

const char *name = dentry->d_name.name;  “dir1”

int namelen = dentry->d_name.len;   4

struct omfs_inode *oi;

struct buffer_head *bh;

u64 block;

__be64 *entry;

int ofs;

 

/* just prepend to head of queue in proper bucket */

bh = omfs_get_bucket(dir, name, namelen, &ofs);--------------

if (!bh)

  goto out;

 

entry = (__be64 *) &bh->b_data[ofs];

block = be64_to_cpu(*entry);

*entry = cpu_to_be64(inode->i_ino);

mark_buffer_dirty(bh);

brelse(bh);

 

/* now set the sibling and parent pointers on the new inode */

bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));

if (!bh)

  goto out;

 

oi = (struct omfs_inode *) bh->b_data;

memcpy(oi->i_name, name, namelen);

memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);

oi->i_sibling = cpu_to_be64(block);

oi->i_parent = cpu_to_be64(dir->i_ino);

mark_buffer_dirty(bh);

brelse(bh);

 

dir->i_ctime = CURRENT_TIME_SEC;

 

/* mark affected inodes dirty to rebuild checksums */

mark_inode_dirty(dir);

mark_inode_dirty(inode);

return 0;

out:

return -ENOMEM;

}

 

bh = omfs_get_bucket(dir, name, namelen, &ofs);

static struct buffer_head *omfs_get_bucket(struct inode *dir,

  const char *name, int namelen, int *ofs)

{

int nbuckets = (dir->i_size - OMFS_DIR_START)/8;  //(2048-0x1b8)/8=201

int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);  dir->i_no=3 所以 block=12

int bucket = omfs_hash(name, namelen, nbuckets);  将name做hash值:45

 

*ofs = OMFS_DIR_START + bucket * 8;   0x1b8+45*8=0x320

return sb_bread(dir->i_sb, block);

}

 

entry = (__be64 *) &bh->b_data[ofs];

block = be64_to_cpu(*entry);

*entry = cpu_to_be64(inode->i_ino);

 

这样将新建dir1的inode的i_ino=6放在了0x320的偏移地址上。

 

读dir1所在的block,设置omfs_inode的一些值:

oi = (struct omfs_inode *) bh->b_data;

memcpy(oi->i_name, name, namelen);“dir1”

memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);

oi->i_sibling = cpu_to_be64(block); 

oi->i_parent = cpu_to_be64(dir->i_ino);3

4、  d_instantiate(dentry, inode);

建立dentry和inode的联系

将dentry->d_inode = inode;

这样,一个文件夹就建立了,现在再看看硬盘布局的改变:

在0x6320的位置是:0000 0000 0000 0006  这就是dir1的inode号

再看看0x2000×6 = 0xc000地址

其实经过了上面了过程,实际的数据并没有完全写入硬盘,等一段时间后才会写入。

因为是块设备,不会在一有读写的时候就立即写入硬盘,而是通过一定的策略去写硬盘,这样会大大提高读写的效率。每一次写入都要寻址,这个磁盘寻址是整个计算机最慢的操作之一。为了优化寻址操作,内核既不会简单的按请求接受次序,也不会立即将其提交给磁盘。相反,它会在提交前,先执行名为合并与排序的预操作,这种预操作可以极大地提高系统的整体性能。在内核中负责提交I/O请求的子系统称为I/O调度程序。

我们这里先不讨论I/O调度程序的策略算法,我们先看看实际的写入操作。

 

在Breakpoint 2, omfs_write_inode (inode=0xdde13c20, wbc=0xde455ebc)   下断点

Mkdir dir1后,过一段时间会到这个断点,看看函数堆栈:

#0  omfs_write_inode (inode=0xdde13c20, wbc=0xde455ebc) at fs/omfs/inode.c:169

#1  0xc02b749c in write_inode (inode=0xdde13c20, wbc=0xde455ebc)

at fs/fs-writeback.c:388

#2  0xc02b76fb in writeback_single_inode (inode=0xdde13c20, wbc=0xde455ebc)

at fs/fs-writeback.c:477

#3  0xc02b79f1 in writeback_sb_inodes (sb=0xde72e600, wb=0xd2339630,

wbc=0xde455ebc) at fs/fs-writeback.c:640

#4  0xc02b7b58 in writeback_inodes_wb (wb=0xd2339630, wbc=0xde455ebc)

at fs/fs-writeback.c:691

#5  0xc02b7d94 in wb_writeback (wb=0xd2339630, args=0xde455f14)

at fs/fs-writeback.c:786

#6  0xc02b7fd6 in wb_check_old_data_flush (wb=0xd2339630)

at fs/fs-writeback.c:879

#7  0xc02b8086 in wb_do_writeback (wb=0xd2339630, force_wait=0)

at fs/fs-writeback.c:923

#8  0xc02b80bd in bdi_writeback_task (wb=0xd2339630) at fs/fs-writeback.c:939

#9  0xc024ec76 in bdi_start_fn (ptr=0xd2339630) at mm/backing-dev.c:316

#10 0xc019ce95 in kthread (_create=0xdf923f28) at kernel/kthread.c:78

#11 0xc0104c06 in ?? () at arch/x86/kernel/entry_32.S:1051

可以看到我们writeback的是”/”目录的inode。

 

这个断点会再停一次,因为我们会回写”dir1”对应的inode:

 

 

接下来,看看目录的删除:

 

 

系统调用rmdir dir1

 

#0  omfs_rmdir (dir=0xdee25d88, dentry=0xdedc8660) at fs/omfs/dir.c:261

#1  0xc029fff3 in vfs_rmdir (dir=0xdee25d88, dentry=0xdedc8660)

at fs/namei.c:2178

#2  0xc02a012a in do_rmdir (dfd=-100, pathname=0xbfe69932 "dir1")

at fs/namei.c:2230

#3  0xc02a0182 in sys_rmdir (pathname=0xbfe69932 "dir1") at fs/namei.c:2245

#4  0xc0104657 in ?? () at arch/x86/kernel/entry_32.S:457

Backtrace stopped: previous frame inner to this frame (corrupt stack?)

 

Sys_rmdiràdo_rmdiràvfs_rmdiràomfs_rmdir

 

Dir是根目录的inode,dentry是dir1的目录项。

 

static int omfs_rmdir(struct inode *dir, struct dentry *dentry)

{

int err = -ENOTEMPTY;

struct inode *inode = dentry->d_inode;

 

if (omfs_dir_is_empty(inode)) {

err = omfs_unlink(dir, dentry);

if (!err)

inode_dec_link_count(inode);

}

return err;

}

omfs_dir_is_empty(inode)判断dir1目录是不是空目录

判断方法很简单:

查找dir1对应的block中的数据,从OMFS_DIR_START到0x800查找,看每一个64位的数据都是否等于0xffff ffff ffff ffff,如果有一个不等,那么目录不为空。

若目录不为空,omfs_rmdir返回ENOTEMPTY错误号,

若目录为空,则

omfs_unlink(dir, dentry);

static int omfs_unlink(struct inode *dir, struct dentry *dentry)

{

int ret;

struct inode *inode = dentry->d_inode;

 

ret = omfs_delete_entry(dentry);

if (ret)

goto end_unlink;

 

inode_dec_link_count(inode);

mark_inode_dirty(dir);

 

end_unlink:

return ret;

}

 

可以看出,在删除文件夹的时候,或者说在删除文件的时候,我们删除了entry,并没有动文件的内容,其实可以理解,entry删除了,那些内容也不会用到。

Omfs_delete_inode //当entry删除时,这个函数会调用,需要清除bitmap中的相应位。

 调用关系:

Breakpoint 4, omfs_delete_inode (inode=0xdee3c708) at fs/omfs/inode.c:183

183   truncate_inode_pages(&inode->i_data, 0);

(gdb) bt

#0  omfs_delete_inode (inode=0xdee3c708) at fs/omfs/inode.c:183

#1  0xc02ab9fb in generic_delete_inode (inode=0xdee3c708) at fs/inode.c:1216

#2  0xc02abcb2 in generic_drop_inode (inode=0xdee3c708) at fs/inode.c:1290

#3  0xc02abd03 in iput_final (inode=0xdee3c708) at fs/inode.c:1314

#4  0xc02abd4f in iput (inode=0xdee3c708) at fs/inode.c:1332

#5  0xc02a710e in dentry_iput (dentry=0xdef364c8) at fs/dcache.c:118

#6  0xc02a7287 in d_kill (dentry=0xdef364c8) at fs/dcache.c:177

#7  0xc02a73b4 in dput (dentry=0xdef364c8) at fs/dcache.c:256

#8  0xc02a0143 in do_rmdir (dfd=-100, pathname=0xbfdcd932 "dir1")

at fs/namei.c:2234

#9  0xc02a0182 in sys_rmdir (pathname=0xbfdcd932 "dir1") at fs/namei.c:2245

#10 0xc0104657 in ?? () at arch/x86/kernel/entry_32.S:457

Backtrace stopped: previous frame inner to this frame (corrupt stack?)

 

对于删除文件的inode,omfs_clear_range(inode->i_sb, inode->i_ino, 2);

omfs_clear_range (sb=0xdd91e400, block=6, count=2) at fs/omfs/bitmap.c:176

这个函数就会删除bitmap相应的位。