不论是文件,还是目录,在文件系统中都由inode表示,本篇主要看一下创建一个目录f2fs都做了哪些事情,创建了哪些结构,什么样的格式,怎样最终存储到存储中。
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
struct dentry *dentry;
struct path path;
int error;
unsigned int lookup_flags = LOOKUP_DIRECTORY;
retry:
dentry = user_path_create(dfd, pathname, &path, lookup_flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = security_path_mkdir(&path, dentry, mode);
if (!error)
error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
return error;
}
系统调用mkdirat供应用层创建一个目录,此函数先在ram中创建目录的dentry结构,然后调用vfs_mkdir2进入真正的创建流程。vfs_mkdir2函数有四个参数:
1) path.mnt: 创建目录的上一层目录的vfsmount信息
2) struct inode *dir: 上级目录的inode结构
3) struct dentry*dentry: 要创建目录的dentry
vfs_mkdir2这些参数,最终会传给f2fs_mkdir,f2fs_mkdir函数实现f2fs文件系统创建目录相关结构。
static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
int err;
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
f2fs_mkdir函数,
首先判断f2fs check point是否有错,初始化quot info,之后会调用f2fs_new_inode,新建f2fs inode, 表示新建的目录:
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
nid_t ino;
struct inode *inode;
bool nid_free = false;
int xattr_size = 0;
int err;
//在f2fs inode cache申请一个f2fs inode,并初始化
inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
f2fs_lock_op(sbi);
//新申请一个node id
if (!alloc_nid(sbi, &ino)) {
f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
}
f2fs_unlock_op(sbi);
nid_free = true;
//初始化inode gid,uid等
inode_init_owner(inode, dir, mode);
//设置inode i_ino为申请的f2fs nid
inode->i_ino = ino;
inode->i_blocks = 0;//初始化时间等信息
inode->i_mtime = inode->i_atime = inode->i_ctime =
F2FS_I(inode)->i_crtime = current_time(inode);
inode->i_generation = sbi->s_next_generation++;
err = insert_inode_locked(inode);
if (err) {
err = -EINVAL;
goto fail;
}
if (f2fs_sb_has_project_quota(sbi->sb) &&
(F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = dquot_initialize(inode);
if (err)
goto fail_drop;
err = dquot_alloc_inode(inode);
if (err)
goto fail_drop;
//设置inode 标志为FI_NEW_INODE
set_inode_flag(inode, FI_NEW_INODE);
/* If the directory encrypted, then we should encrypt the inode. */
if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
if (f2fs_sb_has_extra_attr(sbi->sb)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(inode, FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
/* Otherwise, will be 0 */
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
}
F2FS_I(inode)->i_inline_xattr_size = xattr_size;
f2fs_init_extent_tree(inode, NULL);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_flags |= FS_INDEX_FL;
if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
trace_f2fs_new_inode(inode, 0);
//设置inode属性,最后返回
return inode;
fail:
trace_f2fs_new_inode(inode, err);
make_bad_inode(inode);
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
fail_drop:
trace_f2fs_new_inode(inode, err);
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
return ERR_PTR(err);
}
f2fs_new_inode的作用,
一是在f2fs inode cache新申请一个inode结构,
二是在f2fs node manager free node id中新申请一个nid,
初始化inode结构的各个成员,包括将inode->i_ino指向nid。f2fs_new_indoe后返回f2fs_mkdir函数:
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
inode_nohighmem(inode);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
goto out_fail;
f2fs_unlock_op(sbi);
alloc_nid_done(sbi, inode->i_ino);
d_instantiate_new(dentry, inode);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
最后这一段代码主要就是设置inode属性,如操作inode的函数,操作mapping函数等,最后会执行f2fs_balance_fs。
有一个疑惑的地方,就是inode信息是如何保存到f2fs物理磁盘中的呢?上述的f2fs_mkdir过程,只是在f2fs inode cache中新申请了一个f2fs inode 结构并初始化,此inode还是保存在f2fs inode cache中,其数据怎样最终存储到f2fs main area中?
在inode.c中有一个函数,f2fs_write_inode(struct inode* inode, struct writeback_control *wbc), 看这个函数有点像将inode写入到磁盘的函数,看看这个函数的调用过程:
wb_workfn() -> wb_do_writeback() -> wb_writeback() -> writeback_sb_inodes() -> __writeback_single_inode() -> write_inode() -> f2fs_write_inode()
可以看到,此调用过程就是将f2fs super block中所有的inode写回的一个过程,这里主要看f2fs_write_inode()具体怎样写回。
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
return 0;
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
/*
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
update_inode_page(inode);
if (wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
}
f2fs_write_inode()函数写回的功能主要由update_inode_page()函数实现:
void update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
retry:
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
int err = PTR_ERR(node_page);
if (err == -ENOMEM) {
cond_resched();
goto retry;
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi, false);
}
return;
}
update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
}
update_inode_page()的参数是要写回的inode, 即之前new inode时在f2fs inode cache中新申请的inode,通过get_node_page()函数,得到与此inode相对应的node page, 通过update_inode,将inode信息更新到node page中,最后将node page中的信息写回,通过get_node_page()函数获得与此inode相关的node page:
static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
struct page *parent, int start)
{
struct page *page;
int err;
if (!nid)
return ERR_PTR(-ENOENT);
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
repeat:
//从node-inode mapping的cache中查找此inode对应的cache,如果不存在,则新申请
//并将新申请的inode cache加入node-inode mappint中
page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
if (!page)
return ERR_PTR(-ENOMEM);
//
err = read_node_page(page, 0);
if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
} else if (err == LOCKED_PAGE) {
err = 0;
goto page_hit;
}
if (parent)
ra_node_pages(parent, start + 1, MAX_RA_NODE);
lock_page(page);
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
if (unlikely(!PageUptodate(page))) {
err = -EIO;
goto out_err;
}
if (!f2fs_inode_chksum_verify(sbi, page)) {
err = -EBADMSG;
goto out_err;
}
page_hit:
if(unlikely(nid != nid_of_node(page))) {
f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
"nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
nid, nid_of_node(page), ino_of_node(page),
ofs_of_node(page), cpver_of_node(page),
next_blkaddr_of_node(page));
err = -EINVAL;
out_err:
ClearPageUptodate(page);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
return page;
}
f2fs_grab_cache_page(node-inode mapping, nid) -> grab_cache_page(mapping, nid) -> find_or_create_page(mapping, nid) -> pagecache_getpage() 经过这一系列的调用路径后,先在node-inode cache中查找,是否存在与此inode对应的page, 如果存在则返回,否则,创建一个新的page, 并将其加入到node-inode mappint中。得到page后,继续调用read_node_page(page,0)更新此node page对应的数据,即从物理磁盘上读取对应的数据。
/*
* Caller should do after getting the following values.
* 0: f2fs_put_page(page, 0)
* LOCKED_PAGE or error: f2fs_put_page(page, 1)
*/
static int read_node_page(struct page *page, int op_flags)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct node_info ni;
struct f2fs_io_info fio = {
.sbi = sbi,
.type = NODE,
.op = REQ_OP_READ,
.op_flags = op_flags,
.page = page,
.encrypted_page = NULL,
};
if (PageUptodate(page))
return LOCKED_PAGE;
get_node_info(sbi, page->index, &ni);
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
return -ENOENT;
}
fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
return f2fs_submit_page_bio(&fio);
}
read_node_page()首先准备f2fs_io_info,设置f2fs_io_info的superblock, type为NODE, op为REQ_OP_READ读操作,page为得到的node-inode page,然后调用get_node_info(sbi, page->index, &ni),先得到inode对应的node info结构,此node info存在于nat area:
struct node_info {
nid_t nid; /* node id */
nid_t ino; /* inode number of the node's owner */
block_t blk_addr; /* block address of the node */
unsigned char version; /* version of the node */
unsigned char flag; /* for node information bits */
};
可以看到,node info结构体将node与blk_addr对应起来,这样就得到了inode对应的在main area存储的地址,最后read_node_page()函数通过f2fs_submit_page_bio(&fio)从main area中读到inode对应的数据信息。
能过下面的图,可以看到inode, nat, main node page的对应关系: