红联Linux门户
Linux帮助

Linux下omfs文件系统的硬盘布局

发布时间:2014-11-24 22:00:17来源:linux网站作者:dndxhej

文件系统是linux内核的重要组成部分,涉及到vfs、块IO层的调度机制,块设备驱动以及具体文件系统所采用的数据结构。所使用linux内核版本是2.6.34.1。

以fs/omfs为例,主要学习vfs的实现,omfs的硬盘布局,它所采用的数据结构为何能够优化MPEG文件系统。Omfs文件系统的具体文件读写又是如何实现的。

OMFS:Optimized MPEG Filesystem

OMFS是由SonicBlue公司创建的用于ReplayTV DVR和MP3 player的文件系统。该文件系统是基于extent的(现代很多文件系统都采用extent替代block来管理磁盘。Extent就是一些连续的block,可以有效减少元数据开销。),可用的block大小在2k到8k之间,目录结构是基于hash的。

该文件系统在特定的流媒体设备中性能很好,但对于一般的应用,linux主流的文件系统应该性能更优。Omfs是如何针对MPEG做性能优化,这点还在探索中。


硬盘布局格式:

Omfs区分sysblocks和一般的数据blocks。Sysblock group由superblock信息、文件的metadata元数据、目录结构和extents构成。每一个sysblock都有一个包含CRC校验的头,而且可以在硬盘上备份。Sysblock大小比一个数据block小,但是它们都用64位的块号寻址。

Sysblock 头信息:

struct omfs_header { __be64 h_self;  /* FS block where this is located */ __be32 h_body_size;     /* size of useful data after header */ __be16 h_crc;   /* crc-ccitt of body_size bytes */

char h_fill1[2];

u8 h_version;   /* version, always 1 */

char h_type;    /* OMFS_INODE_X */

u8 h_magic;     /* OMFS_IMAGIC */

u8 h_check_xor; /* XOR of header bytes before this */ __be32 h_fill2; };


文件和目录都由omfs_inode表示:

struct omfs_inode { struct omfs_header i_head;      /* header */

__be64 i_parent;/* parent containing this inode */ __be64 i_sibling;       /* next inode in hash bucket */

__be64 i_ctime; /* ctime, in milliseconds */ char i_fill1[35];

char i_type;    /* OMFS_[DIR,FILE] */

__be32 i_fill2;

char i_fill3[64];

char i_name[OMFS_NAMELEN];      /* filename */

__be64 i_size;  /* size of file, in bytes */ };


OMFS中的目录是一个大的hash表。文件名经过hash计算,然后放到以OMFS_DIR_START开始的桶中。查找的时候需要hash文件名,然后通过i_sibling指针查找到匹配的i_name。

文件以omfs_inode结构体开头,后面跟着在OMFS_EXTENT_START开始的extent table。

struct omfs_extent_entry {

__be64 e_cluster;       /* start location of a set of blocks */

__be64 e_blocks;/* number of blocks after e_cluster */

};

struct omfs_extent {

__be64 e_next;  /* next extent table location */

__be32 e_extent_count;  /* total # extents in this table */

__be32 e_fill;

struct omfs_extent_entry e_entry;       /* start of extent entries */

};


通过Mkomfs.c可以了解omfs的基本布局:

在这里我们不使用实际的硬盘,用loop设备模拟一下:

生成一个文件:dd if=/dev/zero of=file.img bs=512 count=10000

10000+0 records in

10000+0 records out

5120000 bytes (5.1 MB) copied, 0.102142 s, 50.1 MB/s


用losetup /dev/loop0 file.img将loop设备和file.img关联上。

再用mkomfs /dev/loop0来布局:

Mkomfs的默认配置参数是:

fs_config_t config = {

.block_size = 8192,

.cluster_size = 8,

.clear_dev = 0

};

获取设备的大小:size=bs×count=512×10000=5120000

create_fs(fp, size/512, &config);中

block_size = 8192

blocks_per_sector = block_size / SECTOR_SIZE = 8192/512 = 16

blocks = sectors / blocks_per_sector = 10000/16 =625=0x271


初始化omfs_super_block结构体:

struct omfs_super_block {

char s_fill1[192];

char s_name[OMFS_SUPER_NAMELEN];    :”omfs”

__be64 s_root_block;       /* block number of omfs_root_block */    :ROOT_BLK=1

__be64 s_num_blocks;     /* total number of FS blocks */  :blocks=625=0x271

__be32 s_magic;/* OMFS_MAGIC */   : OMFS_MAGIC 0xC2993D87

__be32 s_blocksize;  /* size of a block */   :block_size=8192=0x2000

__be32 s_mirrors;    /* # of mirrors of system blocks */    :2

__be32 s_sys_blocksize;  /* size of non-data blocks */     :2048=0x800

};      288个字节

hexdump -C -s 0 -n 512 /dev/loop0

00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

000000c0  6f 6d 66 73 00 00 00 00  00 00 00 00 00 00 00 00  |omfs............|

000000d0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00000100  00 00 00 00 00 00 00 01  00 00 00 00 00 00 02 71  |...............q|

00000110  c2 99 3d 87 00 00 20 00  00 00 00 02 00 00 08 00  |..=... .........|

00000120  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00000200


将superblock的288个字节写在最开头,

初始化omfs_root_block结构体:

struct omfs_root_block {

struct omfs_header r_head;   /* header */

__be64 r_fill1;

__be64 r_num_blocks;     /* total number of FS blocks */     :blocks=625

__be64 r_root_dir;   /* block # of root directory */  : ROOT_DIR_BLK=3

__be64 r_bitmap;     /* block # of free space bitmap */      :BITMAP_BLK 5

__be32 r_blocksize;   /* size of a block */   :block_size=8192

__be32 r_clustersize;       /* size allocated for data blocks */   : cluster_size = 8

__be64 r_mirrors;    /* # of mirrors of system blocks */      :2

char r_name[OMFS_NAMELEN];     /* partition label */  :”omfs”

__be64 r_fill2;

};     336个字节

struct omfs_header {

__be64 h_self;    /* FS block where this is located */   :ROOT_BLK=1

__be32 h_body_size;/* size of useful data after header */  :336-24=312=0x138

__be16 h_crc;    /* crc-ccitt of body_size bytes */

char h_fill1[2];

u8 h_version;     /* version, always 1 */:1

char h_type;       /* OMFS_INODE_X */: OMFS_INODE_SYSTEM=’s’

u8 h_magic;/* OMFS_IMAGIC */  : OMFS_IMAGIC 0xD2

u8 h_check_xor; /* XOR of header bytes before this */

__be32 h_fill2;

};    24个字节


然后移动到8192个字节,0x2000处,写入rootblock;再移动到0x4000处,再写一遍rootblock。

hexdump -C -s 0x2000 -n 512 /dev/loop0

00002000  00 00 00 00 00 00 00 01  00 00 01 38 00 00 00 00  |...........8....|

00002010  01 73 d2 00 00 00 00 00  00 00 00 00 00 00 00 00  |.s..............|

00002020  00 00 00 00 00 00 02 71  00 00 00 00 00 00 00 03  |.......q........|

00002030  00 00 00 00 00 00 00 05  00 00 20 00 00 00 00 08  |.......... .....|

00002040  00 00 00 00 02 00 00 00  6f 6d 66 73 00 00 00 00  |........omfs....|

00002050  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00002200

hexdump -C -s 0x4000 -n 512 /dev/loop0

00004000  00 00 00 00 00 00 00 01  00 00 01 38 00 00 00 00  |...........8....|

00004010  01 73 d2 00 00 00 00 00  00 00 00 00 00 00 00 00  |.s..............|

00004020  00 00 00 00 00 00 02 71  00 00 00 00 00 00 00 03  |.......q........|

00004030  00 00 00 00 00 00 00 05  00 00 20 00 00 00 00 08  |.......... .....|

00004040  00 00 00 00 02 00 00 00  6f 6d 66 73 00 00 00 00  |........omfs....|

00004050  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00004200

Root directory inode信息:

struct omfs_inode {

struct omfs_header i_head;    /* header */

__be64 i_parent;      /* parent containing this inode */    :~0

__be64 i_sibling;       /* next inode in hash bucket */      :~0

__be64 i_ctime;  /* ctime, in milliseconds */

char i_fill1[35];

char i_type;/* OMFS_[DIR,FILE] */     :’D’

__be32 i_fill2;:1

char i_fill3[64];

char i_name[OMFS_NAMELEN];     /* filename */

__be64 i_size;    /* size of file, in bytes */    :2048 

};

struct omfs_header {

__be64 h_self;    /* FS block where this is located */   : ROOT_DIR_BLK 3

__be32 h_body_size;/* size of useful data after header */  :2048-24=2024

__be16 h_crc;    /* crc-ccitt of body_size bytes */

char h_fill1[2];

u8 h_version;     /* version, always 1 */     :1

char h_type;       /* OMFS_INODE_X */   : OMFS_INODE_NORMAL 'e'

u8 h_magic;/* OMFS_IMAGIC */:OMFS_IMAGIC 0xD2

u8 h_check_xor; /* XOR of header bytes before this */

__be32 h_fill2;

};


申请一块大小为2048字节的内存,前面416=0x1a0个字节放root inode,

从OMFS_DIR_START 0x1b8到2048=0x800全部设置为0xff。

将这2048个字节写到0x6000位置,再重复将这2048个字节写到0x8000位置。

hexdump -C -s 0x6000 -n 512 /dev/loop0

00006000  00 00 00 00 00 00 00 03  00 00 07 e8 1c 5c 00 00  |.............\..|

00006010  01 65 d2 1a 00 00 00 00  ff ff ff ff ff ff ff ff  |.e..............|

00006020  ff ff ff ff ff ff ff ff  00 00 01 35 5a 8e 42 bb  |...........5Z.B.|

00006030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00006050  00 00 00 44 00 00 00 01  00 00 00 00 00 00 00 00  |...D............|

00006060  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00006190  00 00 00 00 00 00 00 00  00 00 00 00 00 00 08 00  |................|

000061a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

000061b0  00 00 00 00 00 00 00 00  ff ff ff ff ff ff ff ff  |................|

000061c0  ff ff ff ff ff ff ff ff  ff ff ff ff ff ff ff ff  |................|

*

00006200

hexdump -C -s 0x8000 -n 512 /dev/loop0

00008000  00 00 00 00 00 00 00 03  00 00 07 e8 1c 5c 00 00  |.............\..|

00008010  01 65 d2 1a 00 00 00 00  ff ff ff ff ff ff ff ff  |.e..............|

00008020  ff ff ff ff ff ff ff ff  00 00 01 35 5a 8e 42 bb  |...........5Z.B.|

00008030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00008050  00 00 00 44 00 00 00 01  00 00 00 00 00 00 00 00  |...D............|

00008060  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

00008190  00 00 00 00 00 00 00 00  00 00 00 00 00 00 08 00  |................|

000081a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

000081b0  00 00 00 00 00 00 00 00  ff ff ff ff ff ff ff ff  |................|

000081c0  ff ff ff ff ff ff ff ff  ff ff ff ff ff ff ff ff  |................|

*

00008200


最后,就是free space bitmap了。

bitmap_size = (swap_be64(super.s_num_blocks) + 7)/8;  =(625+7)/8=79

dirty_size = (bitmap_size + 7)/8;    =(79+7)/8=10

first_blk = BITMAP_BLK + (bitmap_size +

swap_be32(super.s_blocksize)-1) / swap_be32(super.s_blocksize);  =5+(79+8192-1)/8192=6

因为bitmap的每一位代表一个block,所以可以计算出bitmap的字节数。

Bitmap.bmap申请bitmap_size=79个字节的内存大小

for (i=0; i<first_blk; i++)

{

bitmap.bmap[i/8] |= 1<<(i & 7);

}

Bitmap.bmap[0] = 0011 1111  2进制    0x3f

因为blocks 0-5被用了,所以bitmap的相应位都置上1。

Bitmap.dirty申请dirty_size=10个字节的大小,每个字节都置为0xff

将bitmap写入0xa000。(第6个block,block是从block 0开始的,block 0放的是super block)。

hexdump -C -s 0xa000 -n 512 /dev/loop0

0000a000  3f 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |?...............|

0000a010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

0000a050  00 00 00 00 11 00 00 00  ff ff ff ff ff ff ff ff  |................|

0000a060  ff ff 00 00 c9 0c 02 00  00 00 00 00 00 00 00 00  |................|

0000a070  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

*

0000a190  00 00 00 00 00 00 00 00  00 00 00 00 00 00 08 00  |................|

0000a1a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|

0000a1b0  00 00 00 00 00 00 00 00  ff ff ff ff ff ff ff ff  |................|

0000a1c0  ff ff ff ff ff ff ff ff  ff ff ff ff ff ff ff ff  |................|

*

0000a200


到此,omfs文件系统格式化ok了,mount之后就可以进行常见的文件操作了。