目录
EXT2文件系统结构概览
1.1 EXT2文件系统结构框图
如果文件比较小,其数据块少于12个,其数据块索引就放在inode->i_blocks中,如果文件比较大,操作12个数据块就需要分配间接块来保存数据块索引
1.2 EXT2重要数据结构
super_block是VFS中的标准结构,通过成员s_fs_info与特定文件系统相连
truct super_block | |
struct list_head s_list | 用于将超级块挂到全局链表super_blocks中 |
dev_t s_dev | 文件系统所在设备的设备号 |
unsigned long s_blocksize | 文件系统块大小 |
struct file_system_type *s_type | 文件系统类型,比如ext2_fs_type |
const struct super_operations *s_op | 服装inode的分配inode元数据的同步等等 |
struct dentry *s_root | 文件系统根目录的dentry |
struct block_device *s_bdev | 文件系统所在块设备对应的block_device |
struct hlist_node s_instances | 用于挂到链表file_system_type ->fs_supers |
void *s_fs_info | 指向保存特定文件系统的结构,比如ext2_sb_info |
struct list_head s_inodes | 文件系统所有打开文件的inode链表 |
…… | …… |
结构体ext2_sb_info包含特定文件系统的所有信息,包含超级块,组描述符等等:
struct ext2_sb_info | |
unsigned long s_inodes_per_block | 每个block中可以存放多少个inode描述符 |
unsigned long s_blocks_per_group | 每个块组中包含的数据块数 |
unsigned long s_inodes_per_group | 每个块组中包含的inode数 |
unsigned long s_itb_per_group | 一个块组中用于存放inode的块数 |
unsigned long s_gdb_count | 用于存放组描述符的块数 |
unsigned long s_desc_per_block | 一个块存放组描述符的的数量 |
unsigned long s_groups_count | 组描述符的数量 |
struct buffer_head * s_sbh | 指向存放原始超级块的缓存 |
struct ext2_super_block * s_es | 指向 s_sbh中的超级块结构 |
struct buffer_head ** s_group_desc | 读取超级块的时候也会将组描述符读入内存 |
int s_first_ino | 文件系统中第一个非保留的inode号 |
struct rb_root s_rsv_window_root | 预留窗口的红黑树 |
struct ext2_reserve_window_node s_rsv_window_head | 红黑树的第一个节点 |
…… | …… |
ext2_super_block保存在磁盘上的原始超级块
struct ext2_super_block | |
__le32 s_inodes_count | 文件系统中Inode的数量 |
__le32 s_blocks_count | 文件系统中块数 |
__le32 s_r_blocks_count | 保留的块数 |
__le32 s_free_blocks_count | 空闲的块数 |
__le32 s_free_inodes_count | 空闲的inode数 |
__le32 s_first_data_block | 第一个数据块号 |
__le32 s_log_block_size | 块大小 |
__le32 s_blocks_per_group | 每个块组的块数 |
__le32 s_inodes_per_group | 每个块组的inode数 |
__le32 s_first_ino | 第一个没有保留的inode |
__le16 s_inode_size | Inode结构体的大小 |
__le16 s_block_group_nr | 当前 ext2_super_block所在块组编号,超级块在磁盘中每个块组中都有备份 |
…… | …… |
结构体ext2_inode_info链接VFS inode和原始inode
struct ext2_inode_info | |
__le32 i_data[15]; | Inode的直接块 |
__u32 i_block_group; | Inode所属的块组 |
struct inode vfs_inode; | VFS inode |
…… | …… |
结构体ext2_inode是存放于磁盘上的原始inode
struct ext2_inode | |
__le16 i_mode | 文件模式, |
__le32 i_size | 文件大小(bytes) |
__le32 i_blocks | 文件大小(块) |
__le32 i_block[EXT2_N_BLOCKS] | 直接索引块 |
…… | …… |
结构体ext2_dir_entry_2也是磁盘上的一个结构,它表示目录下面的一个目录项。也就是目录的内容(子目录或者文件),目录也有一个inode,它也有数据块,其数据块上的存放的每一项都是用ext2_dir_entry_2来表示,例如:
chenying@chenying:~/workspace/kernel_4.12/linux-4.12.3/mm$ ls ~/workspace/
1496324869gf_common.h androidJ6 aosp dumpe2fs.txt gf_common.h kernel_4.12 log readme
struct ext2_dir_entry_2 | |
__le32 inode | 这个目录项对应的inode编号 |
__le16 rec_len | rec_len字段的末尾到下一个 rec_len的偏移,方便在数据块上查找下一个目录项 |
__u8 name_len | 目录项名的长度 |
__u8 file_type | 文件类型,目录、普通文件、管道、链接等等 |
char name[] | 目录项名字 |
…… | …… |
struct ext2_group_desc | |
__le32 bg_block_bitmap | 数据块位图的块号 |
__le32 bg_inode_bitmap | Inode位图的块号 |
__le32 bg_inode_table | Inode表的块号 |
__le16 bg_free_blocks_count | 块组中空闲块的数量 |
__le16 bg_free_inodes_count | 块组中空闲inode的数量 |
__le16 bg_used_dirs_count | 块组中目录的数量 |
…… | …… |
用命令dumpe2fs可以dump出文件系统的信息:
chenying@chenying:~/workspace/kernel_4.12/linux-4.12.3/fs$ sudo dumpe2fs -h /dev/sda1
dumpe2fs 1.43.3 (04-Sep-2016)
Filesystem volume name: <none>
Last mounted on: /
Filesystem UUID: 22af4caf-a05b-4d8f-8004-30d531867b55
Filesystem magic number: 0xEF53
Filesystem revision #: 1 (dynamic)
Filesystem features: has_journal ext_attr resize_inode dir_index filetype needs_recovery extent 64bit flex_bg sparse_super large_file huge_file dir_nlink extra_isize metadata_csum
Filesystem flags: signed_directory_hash
Default mount options: user_xattr acl
Filesystem state: clean
Errors behavior: Continue
Filesystem OS type: Linux
Inode count: 16252928
Block count: 65011456
Reserved block count: 3250572
Free blocks: 23809796
Free inodes: 14213383
First block: 0
Block size: 4096
Fragment size: 4096
Group descriptor size: 64
Reserved GDT blocks: 1024
Blocks per group: 32768
Fragments per group: 32768
Inodes per group: 8192
Inode blocks per group: 512
Flex block group size: 16
Filesystem created: Fri Feb 17 21:00:51 2017
Last mount time: Sat Aug 12 10:26:31 2017
Last write time: Sat Aug 12 10:26:26 2017
Mount count: 32
块缓存
在深入ext2实现逻辑之前我们先插一节块缓存,下面先看快缓存结构:
struct buffer_head | |
unsigned long b_state | 缓存状态位图,例如 BH_Mapped关联到磁盘块; BH_Dirty:脏块; BH_Uptodate:块中数据可用等 |
struct buffer_head *b_this_page | 缓冲区环形链表 |
struct page *b_page | 缓冲区映射到的页 |
sector_t b_blocknr | 对应到磁盘上的块号 |
size_t b_size | 缓存大小 |
char *b_data | 缓存起始地址 |
struct block_device *b_bdev | 块设备,指定了数据的来源 |
…… | …… |
块缓存主要用在两个地方,页缓存和块设备原始数据读取(独立块缓存),例如超级块,组描述符块等等。在页缓存中块缓存依附于页,页释放之后块缓存就释放。独立块缓存由一个lru缓存来管理,这个时候页依附于块缓存,块缓存释放页就释放。
-
struct bh_lru {
-
struct buffer_head *bhs[BH_LRU_SIZE];
-
};
这两种块缓存都是用下面函数创建,他们的不同在于管理的视角不同。
-
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-
int retry)
EXT2文件系统挂载
3.1 注册ext2文件系统类型
静态定义EXT2文件系统类型ext2_fs_type,并通过register_filesystem将其添加到全局链表file_systems上
-
static
struct file_system_type ext2_fs_type = {
-
.owner = THIS_MODULE,
-
.name =
"ext2",
-
.mount = ext2_mount,
-
.kill_sb = kill_block_super,
-
.fs_flags = FS_REQUIRES_DEV,
-
};
-
MODULE_ALIAS_FS(
"ext2");
-
-
-
static
int __
init init_ext2_fs(void)
-
{
-
int err;
-
-
-
err = init_inodecache();
-
if (err)
-
return err;
-
err = register_filesystem(&ext2_fs_type);
//将ext2_fs_type挂到全局链表file_systems上
-
if (err)
-
goto out;
-
return
0;
-
out:
-
destroy_inodecache();
-
return err;
-
}
3.2 ext2文件系统挂载
sget:分配super_block并且将super_block添加到全局链表super_blocks和file_system_type ->fs_supers
fill_super:函数指针,这里指向ext2_fill_super,用于从文件系统中读取super_block,下面细讲:
-
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
-
{
-
struct buffer_head * bh;
-
struct ext2_sb_info * sbi;
-
struct ext2_super_block * es;
-
struct inode *root;
-
unsigned
long sb_block = get_sb_block(&data);
-
unsigned
long logic_sb_block;
-
unsigned
long offset =
0;
-
int blocksize = BLOCK_SIZE;
-
int db_count;
-
-
-
sbi = kzalloc(
sizeof(*sbi), GFP_KERNEL);
//分配ext2_sb_info结构
-
if (!sbi)
-
goto failed;
-
-
-
sb->s_fs_info = sbi;
//VFS中的super_block通过sb->s_fs_info与ext2_sb_info相连接
-
sbi->s_sb_block = sb_block;
-
-
-
blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
-
......
-
if (!(bh = sb_bread(sb, logic_sb_block))) {
//从磁盘中读取原始的超级块结构ext2_super_block
-
ext2_msg(sb, KERN_ERR,
"error: unable to read superblock");
-
goto failed_sbi;
-
}
-
es = (struct ext2_super_block *) (((
char *)bh->b_data) + offset);
-
sbi->s_es = es;
-
......
-
sb->s_magic = le16_to_cpu(es->s_magic);
-
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
-
-
-
/*如果超级块的实际块大小与假设的大小不一致就重新读取超级块,因为超级块占用一个块大小,函数sb_bread也是从指定块号读取一个块大小,如果实际块与假设的块大小不一致就重新读取一个准确的块
-
大小*/
-
if (sb->s_blocksize != blocksize) {
-
brelse(bh);
-
-
-
if (!sb_set_blocksize(sb, blocksize)) {
-
ext2_msg(sb, KERN_ERR,
-
"error: bad blocksize %d", blocksize);
-
goto failed_sbi;
-
}
-
logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
-
offset = (sb_block*BLOCK_SIZE) % blocksize;
-
bh = sb_bread(sb, logic_sb_block);
-
es = (struct ext2_super_block *) (((
char *)bh->b_data) + offset);
-
sbi->s_es = es;
-
}
-
......
-
sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
-
-
-
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
-
sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
-
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
-
-
-
sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
-
sbi->s_itb_per_group = sbi->s_inodes_per_group /
-
sbi->s_inodes_per_block;
-
sbi->s_desc_per_block = sb->s_blocksize /
-
sizeof (struct ext2_group_desc);
-
sbi->s_sbh = bh;
//让s_sbh指向原始超级块数据
-
sbi->s_mount_state = le16_to_cpu(es->s_state);
-
sbi->s_addr_per_block_bits =
-
ilog2 (EXT2_ADDR_PER_BLOCK(sb));
-
sbi->s_desc_per_block_bits =
-
ilog2 (EXT2_DESC_PER_BLOCK(sb));
-
......
-
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
-
le32_to_cpu(es->s_first_data_block) -
1)
-
/ EXT2_BLOCKS_PER_GROUP(sb)) +
1;
-
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) -
1) /
-
EXT2_DESC_PER_BLOCK(sb);
-
sbi->s_group_desc = kmalloc (db_count *
sizeof (struct buffer_head *), GFP_KERNEL);
-
......
-
for (i =
0; i < db_count; i++) {
//读出所有组描述符
-
block = descriptor_loc(sb, logic_sb_block, i);
-
sbi->s_group_desc[i] = sb_bread(sb, block);
-
if (!sbi->s_group_desc[i]) {
-
for (j =
0; j < i; j++)
-
brelse (sbi->s_group_desc[j]);
-
ext2_msg(sb, KERN_ERR,
-
"error: unable to read group descriptors");
-
goto failed_mount_group_desc;
-
}
-
}
-
sbi->s_gdb_count = db_count;
//设置组描述符所占用的块数
-
......
-
/*初始化预分配窗口*/
-
sbi->s_rsv_window_head.rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
-
sbi->s_rsv_window_head.rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
-
sbi->s_rsv_window_head.rsv_alloc_hit =
0;
-
sbi->s_rsv_window_head.rsv_goal_size =
0;
-
ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
-
......
-
sb->s_op = &ext2_sops;
//设置super_operations
-
......
-
root = ext2_iget(sb, EXT2_ROOT_INO);
-
if (IS_ERR(root)) {
-
ret = PTR_ERR(root);
-
goto failed_mount3;
-
}
-
-
-
sb->s_root = d_make_root(root);
//创建根目录的dentry
-
-
-
......
-
ext2_write_super(sb);
-
......
3.3文件系统操作
inode包含了文件操作的全部信息,文件打开时候的file结构初始化信息页都是来源于inode,下面是inode创建时的主要逻辑:
-
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
-
{
-
struct ext2_inode_info *ei;
-
struct buffer_head * bh;
-
struct ext2_inode *raw_inode;
-
struct inode *inode;
-
-
inode = iget_locked(sb, ino);
//创建VFS inode和ext2_inode_info
-
-
ei = EXT2_I(inode);
-
ei->i_block_alloc_info =
NULL;
-
-
raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
//到inode块表中去读取原始inode
-
-
-
......
-
-
-
if (S_ISREG(inode->i_mode)) {
-
inode->i_op = &ext2_file_inode_operations;
-
if (test_opt(inode->i_sb, NOBH)) {
-
inode->i_mapping->a_ops = &ext2_nobh_aops;
-
inode->i_fop = &ext2_file_operations;
-
}
else {
-
inode->i_mapping->a_ops = &ext2_aops;
//页缓存操作函数集
-
inode->i_fop = &ext2_file_operations;
//设置file_operations
-
}
-
}
else
if (S_ISDIR(inode->i_mode)) {
-
inode->i_op = &ext2_dir_inode_operations;
-
......
-
}
文件数据读取
-
int mpage_readpages(struct address_space *mapping, struct list_head *pages,
-
unsigned nr_pages,
get_block_t get_block)
-
{
-
……
-
for (page_idx =
0; page_idx < nr_pages; page_idx++) {
-
struct page *page = lru_to_page(pages);
-
-
prefetchw(&page->flags);
-
list_del(&page->lru);
-
if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
//循环映射多个页到文件系统数据块
-
bio = do_mpage_readpage(bio, page,
-
nr_pages - page_idx,
-
&last_block_in_bio, &map_bh,
-
&first_logical_block,
-
get_block, gfp);
-
}
-
put_page(page);
-
}
-
if (bio)
-
mpage_bio_submit(REQ_OP_READ,
0, bio);
//提交数据读请求给块设备
-
return
0;
-
}
文件数据块分散在磁盘上,要对数据进行读写操作就先要找到文件数据块的块号,函数do_mpage_readpage的工作就是根据文件数据位置偏移找到对应的数据块块号。map_bh用于读取inode的映射块。
-
static struct bio *
-
do_mpage_readpage
(struct bio *bio, struct page *page, unsigned nr_pages,
-
sector_t *last_block_in_bio, struct buffer_head *map_bh,
-
unsigned
long *first_logical_block,
get_block_t get_block,
-
gfp_t gfp)
-
{
-
struct inode *inode = page->mapping->host;
-
const
unsigned blkbits = inode->i_blkbits;
-
const
unsigned blocks_per_page = PAGE_SIZE >> blkbits;
-
const
unsigned blocksize =
1 << blkbits;
-
unsigned first_hole = blocks_per_page;
-
......
-
block_in_file = (
sector_t)page->index << (PAGE_SHIFT - blkbits);
-
last_block = block_in_file + nr_pages * blocks_per_page;
-
last_block_in_file = (i_size_read(inode) + blocksize -
1) >> blkbits;
-
if (last_block > last_block_in_file)
-
last_block = last_block_in_file;
-
page_block =
0;
-
-
nblocks = map_bh->b_size >> blkbits;
-
if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
-
block_in_file < (*first_logical_block + nblocks)) {
//如果前一次循环已经读取了映射块,就在其中查找映射关系
-
unsigned map_offset = block_in_file - *first_logical_block;
-
unsigned last = nblocks - map_offset;
-
-
for (relative_block =
0; ; relative_block++) {
-
if (relative_block == last) {
-
clear_buffer_mapped(map_bh);
-
break;
-
}
-
if (page_block == blocks_per_page)
-
break;
-
blocks[page_block] = map_bh->b_blocknr + map_offset +
-
relative_block;
-
page_block++;
-
block_in_file++;
-
}
-
bdev = map_bh->b_bdev;
-
}
-
map_bh->b_page = page;
-
while (page_block < blocks_per_page) {
//调用函数get_block读取映射块,这个函数后面详解
-
map_bh->b_state =
0;
-
map_bh->b_size =
0;
-
-
if (block_in_file < last_block) {
-
map_bh->b_size = (last_block-block_in_file) << blkbits;
-
if (get_block(inode, block_in_file, map_bh,
0))
-
goto confused;
-
*first_logical_block = block_in_file;
-
}
-
......
-
nblocks = map_bh->b_size >> blkbits;
-
for (relative_block =
0; ; relative_block++) {
-
if (relative_block == nblocks) {
-
clear_buffer_mapped(map_bh);
-
break;
-
}
else
if (page_block == blocks_per_page)
-
break;
-
blocks[page_block] = map_bh->b_blocknr+relative_block;
-
page_block++;
-
block_in_file++;
-
}
-
bdev = map_bh->b_bdev;
-
}
-
......
-
alloc_new:
-
if (bio ==
NULL) {
-
if (first_hole == blocks_per_page) {
-
if (!bdev_read_page(bdev, blocks[
0] << (blkbits -
9),
-
page))
-
goto out;
-
}
-
bio = mpage_alloc(bdev, blocks[
0] << (blkbits -
9),
-
min_t(
int, nr_pages, BIO_MAX_PAGES), gfp);
//根据前面找到的数据块编号初始化bio
-
if (bio ==
NULL)
-
goto confused;
-
}
-
-
length = first_hole << blkbits;
-
if (bio_add_page(bio, page, length,
0) < length) {
//将内存页添加到bio中
-
bio = mpage_bio_submit(REQ_OP_READ,
0, bio);
-
goto alloc_new;
-
}
-
-
......
-
out:
-
return bio;
-
......
块映射
前面提到的函数指针get_block指向函数ext2_get_block,其实现逻辑如下:
-
struct ext2_sb_info {
-
.......
-
spinlock_t s_rsv_window_lock;
-
struct rb_root s_rsv_window_root;
//预分配窗口的红黑树
-
struct ext2_reserve_window_node s_rsv_window_head;
//红黑树的根节点,空窗口
-
.......
-
};
-
-
-
struct ext2_inode_info {
-
......
-
struct ext2_block_alloc_info *i_block_alloc_info;
//预分配信息结构
-
......
-
};
struct ext2_block_alloc_info | |
struct ext2_reserve_window_node rsv_window_node; | 预留窗口信息 |
__u32 last_alloc_logical_block; | 上一次分配的逻辑块,即相对文件偏移的块 |
ext2_fsblk_t last_alloc_physical_block; | 逻辑块,即磁盘上的块号 |
struct ext2_reserve_window_node | |
struct rb_node rsv_node | 用于添加到ext2_sb_info的红黑树中 |
__u32 rsv_goal_size | 预分配的大小 |
struct ext2_reserve_window rsv_window | struct ext2_reserve_window { ext2_fsblk_t _rsv_start; //预分配的起始位置 ext2_fsblk_t _rsv_end;//预分配的结束位置 }; |
inode预分配窗口的初始化:
-
void ext2_init_block_alloc_info(struct inode *inode)
-
{
-
struct ext2_inode_info *ei = EXT2_I(inode);
-
struct ext2_block_alloc_info *block_i;
-
struct super_block *sb = inode->i_sb;
-
-
-
block_i = kmalloc(
sizeof(*block_i), GFP_NOFS);
-
if (block_i) {
-
struct ext2_reserve_window_node *rsv = &block_i->rsv_window_node;
-
-
-
rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
-
rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED;
//标识预分配窗口为空
-
-
-
if (!test_opt(sb, RESERVATION))
-
rsv->rsv_goal_size =
0;
-
else
-
rsv->rsv_goal_size = EXT2_DEFAULT_RESERVE_BLOCKS;
//默认预分配窗口大小为8
-
rsv->rsv_alloc_hit =
0;
-
block_i->last_alloc_logical_block =
0;
-
block_i->last_alloc_physical_block =
0;
-
}
-
ei->i_block_alloc_info = block_i;
-
}
下面正式讲解块的分配:
-
ext2_fsblk_t ext2_new_blocks(struct inode *inode,
ext2_fsblk_t goal,
-
unsigned
long *count,
int *errp)
-
{
-
......
-
struct ext2_super_block *es;
-
struct ext2_sb_info *sbi;
-
struct ext2_reserve_window_node *my_rsv = NULL;
-
struct ext2_block_alloc_info *block_i;
-
unsigned
short windowsz =
0;
-
unsigned
long ngroups;
-
unsigned
long num = *count;
-
-
-
sb = inode->i_sb;
-
-
-
-
-
sbi = EXT2_SB(sb);
-
-
-
block_i = EXT2_I(inode)->i_block_alloc_info;
-
if (block_i) {
-
windowsz = block_i->rsv_window_node.rsv_goal_size;
-
if (windowsz >
0)
-
my_rsv = &block_i->rsv_window_node;
-
}
-
-
-
group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
-
EXT2_BLOCKS_PER_GROUP(sb);
// 计算goal所在的块组
-
goal_group = group_no;
-
retry_alloc:
-
gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);
//获取组描述符
-
if (!gdp)
-
goto io_error;
-
-
-
free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
-
-
-
if (free_blocks >
0) {
-
grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
-
EXT2_BLOCKS_PER_GROUP(sb));
-
bitmap_bh = read_block_bitmap(sb, group_no);
//读取块组数据块位图
-
if (!bitmap_bh)
-
goto io_error;
-
grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,
-
bitmap_bh, grp_target_blk,
-
my_rsv, &num);
//分配数据块,并实现数据块预分配
-
if (grp_alloc_blk >=
0)
-
goto allocated;
-
}
-
-
-
ngroups = EXT2_SB(sb)->s_groups_count;
-
-
-
for (bgi =
0; bgi < ngroups; bgi++) {
//如果在goal所在的块组中没有分配到就从第一个块组开始尝试分配
-
......
-
}
-
if (my_rsv) {
-
my_rsv =
NULL;
-
windowsz =
0;
-
group_no = goal_group;
-
goto retry_alloc;
-
}
-
-
-
allocated:
-
ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no);
-
......
-
return ret_block;
//返回分配到的块组的块号
-
......
下面函数是块分配的核心函数,首先查找一个可以容纳预分配窗口大小的空闲空间,然后将数据块位图上对应的位置设置为1,表示已分配。
-
static ext2_grpblk_t
-
ext2_try_to_allocate_with_rsv
(struct super_block *sb, unsigned int group,
-
struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal,
-
struct ext2_reserve_window_node * my_rsv,
-
unsigned long *count)
-
{
-
ext2_fsblk_t group_first_block, group_last_block;
-
ext2_grpblk_t ret =
0;
-
unsigned
long num = *count;
-
-
if (my_rsv ==
NULL) {
//直接分配数据块不做预分配
-
return ext2_try_to_allocate(sb, group, bitmap_bh,
-
grp_goal, count,
NULL);
-
}
-
-
group_first_block = ext2_group_first_block_no(sb, group);
-
group_last_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) -
1);
-
-
while (
1) {
-
if (rsv_is_empty(&my_rsv->rsv_window) || (ret <
0) ||
-
!goal_in_my_reservation(&my_rsv->rsv_window,
-
grp_goal, group, sb)) {
//预分配窗口为空或者目标块不在my_rsv中
-
if (my_rsv->rsv_goal_size < *count)
// my_rsv->rsv_goal_size初始值为8
-
my_rsv->rsv_goal_size = *count;
-
ret = alloc_new_reservation(my_rsv, grp_goal, sb,
-
group, bitmap_bh);
//查找一个可以容纳预分配窗口大小的空闲空间
-
if (ret <
0)
-
break;
/* failed */
-
-
if (!goal_in_my_reservation(&my_rsv->rsv_window,
-
grp_goal, group, sb))
-
grp_goal =
-1;
-
}
else
if (grp_goal >=
0) {
-
int curr = my_rsv->rsv_end -
-
(grp_goal + group_first_block) +
1;
-
-
if (curr < *count)
-
try_to_extend_reservation(my_rsv, sb,
-
*count - curr);
-
}
-
-
if ((my_rsv->rsv_start > group_last_block) ||
-
(my_rsv->rsv_end < group_first_block)) {
-
rsv_window_dump(&EXT2_SB(sb)->s_rsv_window_root,
1);
-
BUG();
-
}
-
ret = ext2_try_to_allocate(sb, group, bitmap_bh, grp_goal,
-
&num, &my_rsv->rsv_window);
// 将预分配窗口中的数据块在位图上对应的位置设置为1
-
if (ret >=
0) {
-
my_rsv->rsv_alloc_hit += num;
//统计预分配命中数
-
*count = num;
-
break;
/* succeed */
-
}
-
num = *count;
//返回分配到的块数
-
}
-
return ret;
-
}
下面函数是预分配的核心:
-
static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv,
-
ext2_grpblk_t grp_goal, struct super_block *sb,
-
unsigned
int group, struct buffer_head *bitmap_bh)
-
{
-
struct ext2_reserve_window_node *search_head;
-
ext2_fsblk_t group_first_block, group_end_block, start_block;
-
ext2_grpblk_t first_free_block;
-
struct rb_root *fs_rsv_root = &EXT2_SB(sb)->s_rsv_window_root;
-
unsigned
long size;
-
int ret;
-
spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock;
-
-
group_first_block = ext2_group_first_block_no(sb, group);
-
group_end_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) -
1);
-
-
start_block = grp_goal + group_first_block;
//搜索空间区间的起始位置
-
-
size = my_rsv->rsv_goal_size;
-
-
search_head = search_reserve_window(fs_rsv_root, start_block);
//查找离start_block最近的预留窗口
-
-
retry:
-
ret = find_next_reservable_window(search_head, my_rsv, sb,
-
start_block, group_end_block);
//以search_head为起点查找一个可以容纳my_rsv且不与其他预留窗口重叠的空闲区间
-
......
-
first_free_block = bitmap_search_next_usable_block(
-
my_rsv->rsv_start - group_first_block,
-
bitmap_bh, group_end_block - group_first_block +
1);
//在位表中查找块组中rsv_start往后第一个空闲块,因为不是所有块分配都是通过预分配,所以有些块可能已经分配了但是在预留窗口中找不到
-
-
......
-
start_block = first_free_block + group_first_block;
-
-
if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
//空闲块是否在my_rsv中
-
return
0;
/* success */
-
search_head = my_rsv;
//如果my_rsv区间中的块都被分配出去了就以my_rsv为起点重新搜索
-
goto retry;
-
}