Sysfs文件系统的注册由sysfs_init()函数完成的。
int __init sysfs_init(void)
{
int err = -ENOMEM;
sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
sizeof(struct sysfs_dirent),
0, 0, NULL);
if (!sysfs_dir_cachep)
goto out;
err = sysfs_inode_init();
if (err)
goto out_err;
printk(KERN_WARNING "sysfs_fs_type 0x%x\n",&sysfs_fs_type);
err = register_filesystem(&sysfs_fs_type);
if (!err) {
sysfs_mount = kern_mount(&sysfs_fs_type);
if (IS_ERR(sysfs_mount)) {
printk(KERN_ERR "sysfs: could not mount!\n");
err = PTR_ERR(sysfs_mount);
sysfs_mount = NULL;
unregister_filesystem(&sysfs_fs_type);
goto out_err;
}
} else
goto out_err;
out:
return err;
out_err:
kmem_cache_destroy(sysfs_dir_cachep);
sysfs_dir_cachep = NULL;
goto out;
}
1.kmem_cache_create() 用于创建Slab缓存 ,和内核内存管理有关,先不用管它。
2.sysfs_inode_init()调用bdi_init()函数,应该是和数据同步和回写机制有关,也不用管它。
3.register_filesystem(&sysfs_fs_type);这是内核第一次注册文件系统。
原理(见转载的文章<<解析linux中的VFS文件系统机制>>):
在linux源代码中,每个实际的文件系统用以下数据结构表示,
struct file_system_type {
const char *name;
int fs_flags;
int (*get_sb) (struct file_system_type *, int,
const char *, void *, struct vfsmount *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
struct list_head fs_supers;
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
struct lock_class_key i_alloc_sem_key;
};
Sysfs
文件系统的定义在
fs/sysfs/mount.c
中,
static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
.kill_sb = kill_anon_super,
};
注册过程实际上将表示各实际文件系统的 struct file_system_type 数据结构的实例化,然后形成一个链表,内核中用一个名为 file_systems 的全局变量来指向该链表的表头。
全局变量 file_systems 的定义在fs/filesystems.c中,
static struct file_system_type *file_systems;
该指针初始值是空。
按上述原理,可以提出假设
“执行完register_filesystem(&sysfs_fs_type);后,
该指针应该指向代表sysfs文件系统的数据结构sysfs_fs_type。”
下面就分析注册过程,验证上面的假设是否正确。
int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
BUG_ON(strchr(fs->name, '.'));
if (fs->next)
return -EBUSY;
INIT_LIST_HEAD(&fs->fs_supers);
write_lock(&file_systems_lock);
p = find_filesystem(fs->name, strlen(fs->name));
if (*p)
res = -EBUSY;
else
*p = fs;
write_unlock(&file_systems_lock);
return res;
}
3.1.INIT_LIST_HEAD(&fs->fs_supers);
初始化sysfs_fs_type中的fs_supers变量,将其next、prev指针都指向自己。
fs_supers是个双向链表,以后读取到超级块super_block后,会和super_block中的
s_instances链接起来。具体链接代码在fs/super.c中的sget()函数中的语句
list_add(&s->s_instances, &type->fs_supers);
3.2
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)
if (strlen((*p)->name) == len &&
strncmp((*p)->name, name, len) == 0)
break;
return p;
}
执行该函数时,file_systems为空,所以直接返回file_systems的地址。
3.3
if (*p)
res = -EBUSY;
else
*p = fs;
将 sysfs_fs_type 的地址赋给 file_systems ,即 file_systems 指向 &sysfs_fs_type 。
观察fs/namespace.c中的mnt_init()函数,可以发现,rootfs是第二个注册的文件系统,
分析3.2中的find_filesystem()函数,可以断定该函数返回sysfs_fs_type.next指针,随后将
该指针指向rootfs_fs_type。两次文件系统注册后,可以得到下图:
图一
综上所述,文件系统的注册涉及一个数据结构struct file_system_type,和几个全局变量
static struct file_system_type *file_systems;
struct file_system_type sysfs_fs_type,
struct file_system_type rootfs_fs_type。
4.文件系统注册后,会调用kern_mount进行挂载。
挂载过程主要是几个数据结构如vfsmount,super_block,inode,dentry的获取,
初始化,以及彼此之间联系的建立,此外建立与几个全局变量的联系,如
static struct vfsmount *sysfs_mount;
struct super_block * sysfs_sb;
struct sysfs_dirent sysfs_root;
extern struct list_head super_blocks;
struct super_operations sysfs_ops;等等。
至于为什么要做这些事,可以阅读参考文献。
下面分析kern_mount()的代码。
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
char *secdata = NULL;
int error;
if (!type)
return ERR_PTR(-ENODEV);
error = -ENOMEM;
/*分配并初始化vfsmount 结构*/
mnt = alloc_vfsmnt(name);
printk(KERN_WARNING "alloc vfsmnt name %s, mnt_id 0x%x\n",mnt->mnt_devname,mnt->mnt_id);
if (!mnt)
goto out;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
secdata = alloc_secdata();
if (!secdata)
goto out_mnt;
error = security_sb_copy_data(data, secdata);
if (error)
goto out_free_secdata;
}
/*调用sysfs_fs_type中的get_sb函数,获取和初始化super_block等结构*/
error = type->get_sb(type, flags, name, data, mnt);
if (error < 0)
goto out_free_secdata;
BUG_ON(!mnt->mnt_sb);
error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
if (error)
goto out_sb;
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
* this warning for a little while to try and catch filesystems that
* violate this rule. This warning should be either removed or
* converted to a BUG() in 2.6.34.
*/
WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
/*挂载点会指向sget函数中获取到的目录项,见图二*/
mnt->mnt_mountpoint = mnt->mnt_root;
/*指向自己,见图二*/
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
free_secdata(secdata);
return mnt;
out_sb:
dput(mnt->mnt_root);
deactivate_locked_super(mnt->mnt_sb);
out_free_secdata:
free_secdata(secdata);
out_mnt:
free_vfsmnt(mnt);
out:
return ERR_PTR(error);
}
4.1vfsmount结构的获取和初始化
struct vfsmount *alloc_vfsmnt(const char *name)
{
/*分配空间*/
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
if (mnt) {
int err;
/*分配mnt_id*/
err = mnt_alloc_id(mnt);
if (err)
goto out_free_cache;
/*将mnt_devname设置为sysfs_fs_type中的name,也就是“sysfs”*/
if (name) {
mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
if (!mnt->mnt_devname)
goto out_free_id;
}
/*mnt_count设置为1*/
atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
INIT_LIST_HEAD(&mnt->mnt_expire);
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
#ifdef CONFIG_SMP
mnt->mnt_writers = alloc_percpu(int);
if (!mnt->mnt_writers)
goto out_free_devname;
#else
mnt->mnt_writers = 0;
#endif
}
return mnt;
#ifdef CONFIG_SMP
out_free_devname:
kfree(mnt->mnt_devname);
#endif
out_free_id:
mnt_free_id(mnt);
out_free_cache:
kmem_cache_free(mnt_cache, mnt);
return NULL;
}
4.1.1
分配mnt id,并将其赋值给mnt->mnt_id。
Mnt id 是从mnt_id_start 开始分配,static int mnt_id_start = 0;
所以第一次分配到的值是0,然后是1,以此类推。
挂载rootfs时也会调用该函数,到时分配到的mnt id就是1。
/* allocation is serialized by namespace_sem */
static int mnt_alloc_id(struct vfsmount *mnt)
{
int res;
retry:
ida_pre_get(&mnt_id_ida, GFP_KERNEL);
spin_lock(&vfsmount_lock);
res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
if (!res)
mnt_id_start = mnt->mnt_id + 1;
spin_unlock(&vfsmount_lock);
if (res == -EAGAIN)
goto retry;
return res;
}
4.2
Super_block,inode,dentry结构体的获取和初始化
type->get_sb是个函数指针,指向sysfs_get_sb()
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
}
sysfs_get_sb() 调用 get_sb_single()
int get_sb_single(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int),
struct vfsmount *mnt)
{
struct super_block *s;
int error;
/*find or create a superblock*/
s = sget(fs_type, compare_single, set_anon_super, NULL);
if (IS_ERR(s))
return PTR_ERR(s);
if (!s->s_root) {
s->s_flags = flags;
/*继续初始化super_block,并获取根索引节点inode,和根目录项dentry*/
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
return error;
}
s->s_flags |= MS_ACTIVE;
} else {
do_remount_sb(s, flags, data, 0);
}
simple_set_mnt(mnt, s);
return 0;
}
4.2.1.获取和初始化超级块super_block
struct super_block *sget(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
void *data)
{
struct super_block *s = NULL;
struct super_block *old;
int err;
retry:
spin_lock(&sb_lock);
if (test) {
/*注册过程中讲到,fs_supers会和超级块中的s_instances链接起来组成双向链表
*根据container_of()”函数”,可以由s_instances找到所在的超级块。比较后,看是
*否能双向链表中找到要找的超级块。
*/
list_for_each_entry(old, &type->fs_supers, s_instances) {
printk(KERN_WARNING "fs->fs_supers\n");
if (!test(old, data))
continue;
if (!grab_super(old))
goto retry;
if (s) {
up_write(&s->s_umount);
destroy_super(s);
}
return old;
}
}
printk(KERN_WARNING "sget1 s 0x%x\n",s);
if (!s) {
spin_unlock(&sb_lock);
/*分配并初始化一个新的super_block,再次执行上面的查找过程*/
s = alloc_super(type);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
}
printk(KERN_WARNING "sget2 s 0x%x\n",s);
/*调用回调函数set_anon_super()设置sb->s_dev,原理同上面的mnt->mnt_id*/
err = set(s, data);
if (err) {
spin_unlock(&sb_lock);
up_write(&s->s_umount);
destroy_super(s);
return ERR_PTR(err);
}
/*建立super_block和sysfs_fs_type之间的关系,见图二*/
s->s_type = type;
strlcpy(s->s_id, type->name, sizeof(s->s_id));
/*super_blocks是全局变量,将super_block中的s_list添加到super_blocks列表中,
*见图二
*/
list_add_tail(&s->s_list, &super_blocks);
/*sysfs_fs_type->fs_super和sb->s_instance组成双向链表,见图二*/
list_add(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
return s;
}
4.2.2
调用回调函数fill_super()继续初始化super_block,并初始化根索引节点inode和根目录项dentry。
static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct dentry *root;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = SYSFS_MAGIC;
sb->s_op = &sysfs_ops;
sb->s_time_gran = 1;
/*sysfs_sb是全局变量,见图二*/
sysfs_sb = sb;
/* get root inode, initialize and unlock it */
mutex_lock(&sysfs_mutex);
inode = sysfs_get_inode(&sysfs_root);
mutex_unlock(&sysfs_mutex);
if (!inode) {
pr_debug("sysfs: could not get root inode\n");
return -ENOMEM;
}
/* instantiate and link root dentry */
root = d_alloc_root(inode);
if (!root) {
pr_debug("%s: could not get root dentry!\n",__func__);
iput(inode);
return -ENOMEM;
}
/*sysfs_root是全局变量,见图二*/
root->d_fsdata = &sysfs_root;
/*超级块中的目录项指向根目录项dentry见图二*/
sb->s_root = root;
return 0;
}
4.2.2.1
调用sysfs_get_inode(&sysfs_root);建立根索引节点inode
</pre><pre name="code" class="objc">struct sysfs_dirent sysfs_root = {
.s_name = "",
.s_count = ATOMIC_INIT(1),
.s_flags = SYSFS_DIR,
.s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
.s_ino = 1,
};
struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
{
struct inode *inode;
/*sysfs_root中的s_ino等于1
*由sysfs_sb和 sd->s_ino组成hash值,在hash表inode_hashtable中
*查找inode是否已经存在。若不存在,分配并初始化一个新的inode
/
inode = iget_locked(sysfs_sb, sd->s_ino);
/*若是新建的inode,则调用sysfs_init_inode继续初始化inode*/
if (inode && (inode->i_state & I_NEW))
sysfs_init_inode(sd, inode);
return inode;
}
4.2.2.1.1
struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
inode = ifind_fast(sb, head, ino);
if (inode)
return inode;
/*
* get_new_inode_fast() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
return get_new_inode_fast(sb, head, ino);
}
4.2.2.1.1.1
由sysfs_sb和 sd->s_ino组成hash值,在hash表inode_hashtable中
查找inode是否已经存在
static struct inode *ifind_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino)
{
struct inode *inode;
spin_lock(&inode_lock);
inode = find_inode_fast(sb, head, ino);
if (inode) {
__iget(inode);
spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
}
spin_unlock(&inode_lock);
return NULL;
}
4.2.2.1.1.2
若不存在,则重新分配并初始化一个新的inode。
static struct inode *get_new_inode_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino)
{
struct inode *inode;
/*分配一个新的inode*/
inode = alloc_inode(sb);
printk(KERN_WARNING "inode 0x%x\n",inode);
if (inode) {
struct inode *old;
spin_lock(&inode_lock);
/*再次查找hash表*/
/* We released the lock, so.. */
old = find_inode_fast(sb, head, ino);
printk(KERN_WARNING "old 0x%x\n",old);
if (!old) {
/*设置i_ino*/
inode->i_ino = ino;
/*将新的inode添加到hash表中,将inode->i_list添加到
*全局变量inode_in_use中,将inode->i_sb_list添加到
*sb->s_inodes中,见图二
*/
__inode_add_to_lists(sb, head, inode);
/*设置状态,随后会根据该状态决定是否继续初始化inode*/
inode->i_state = I_LOCK|I_NEW;
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
*/
return inode;
}
/*
* Uhhuh, somebody else created the same inode under
* us. Use the old inode instead of the one we just
* allocated.
*/
__iget(old);
spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
}
return inode;
}
static struct inode *alloc_inode(struct super_block *sb)
{
struct inode *inode;
/*分配indoe*/
if (sb->s_op->alloc_inode)
inode = sb->s_op->alloc_inode(sb);
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
if (!inode)
return NULL;
/*初始化*/
if (unlikely(inode_init_always(sb, inode))) {
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
kmem_cache_free(inode_cachep, inode);
return NULL;
}
return inode;
}
int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct address_space_operations empty_aops;
static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
struct address_space *const mapping = &inode->i_data;
printk(KERN_WARNING "inode_init_always 0x%x\n");
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
inode->i_uid = 0;
inode->i_gid = 0;
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
#ifdef CONFIG_QUOTA
memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
#endif
inode->i_pipe = NULL;
inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_rdev = 0;
inode->dirtied_when = 0;
if (security_inode_alloc(inode))
goto out;
/* allocate and initialize an i_integrity */
if (ima_inode_alloc(inode))
goto out_free_security;
spin_lock_init(&inode->i_lock);
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
mutex_init(&inode->i_mutex);
lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
init_rwsem(&inode->i_alloc_sem);
lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
mapping->writeback_index = 0;
/*
* If the block_device provides a backing_dev_info for client
* inodes then use that. Otherwise the inode share the bdev's
* backing_dev_info.
*/
if (sb->s_bdev) {
struct backing_dev_info *bdi;
bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
mapping->backing_dev_info = bdi;
}
inode->i_private = NULL;
inode->i_mapping = mapping;
#ifdef CONFIG_FS_POSIX_ACL
inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
#endif
#ifdef CONFIG_FSNOTIFY
inode->i_fsnotify_mask = 0;
#endif
return 0;
out_free_security:
security_inode_free(inode);
out:
return -ENOMEM;
}
4.2.2.1.2
static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
struct sysfs_inode_attrs *iattrs;
inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
inode->i_op = &sysfs_inode_operations;
inode->i_ino = sd->s_ino;
lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
iattrs = sd->s_iattr;
// printk(KERN_WARNING "iattrs 0x%x\n",iattrs);
if (iattrs) {
/* sysfs_dirent has non-default attributes
* get them for the new inode from persistent copy
* in sysfs_dirent
*/
set_inode_attr(inode, &iattrs->ia_iattr);
if (iattrs->ia_secdata)
security_inode_notifysecctx(inode,
iattrs->ia_secdata,
iattrs->ia_secdata_len);
} else
set_default_inode_attr(inode, sd->s_mode);
/*sysfs_root中的.s_mode= S_IFDIR| ... */
/* initialize inode according to type */
switch (sysfs_type(sd)) {
case SYSFS_DIR:
inode->i_op = &sysfs_dir_inode_operations;//见图二
inode->i_fop = &sysfs_dir_operations;
inode->i_nlink = sysfs_count_nlink(sd);
break;
case SYSFS_KOBJ_ATTR:
inode->i_size = PAGE_SIZE;
inode->i_fop = &sysfs_file_operations;
break;
case SYSFS_KOBJ_BIN_ATTR:
bin_attr = sd->s_bin_attr.bin_attr;
inode->i_size = bin_attr->size;
inode->i_fop = &bin_fops;
break;
case SYSFS_KOBJ_LINK:
inode->i_op = &sysfs_symlink_inode_operations;
break;
default:
BUG();
}
unlock_new_inode(inode);
}
4.2.2.2
调用d_alloc_root(inode);建立根目录项
struct dentry * d_alloc_root(struct inode * root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
static const struct qstr name = { .name = "/", .len = 1 };
/*分配并初始化dentry,父目录项为NULL*/
res = d_alloc(NULL, &name);
if (res) {
/* 目录项和inode的超级块指针都指向之前分配的超级块,见图二*/
res->d_sb = root_inode->i_sb;
res->d_parent = res;
d_instantiate(res, root_inode);
}
}
return res;
}
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
__d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
/*inode的i_dentry和dentry的d_alias组成双向链表*/
if (inode)
list_add(&dentry->d_alias, &inode->i_dentry);
/*建立目录项和inode之间的联系*/
dentry->d_inode = inode;
fsnotify_d_instantiate(dentry, inode);
}
4.2.3
建立vfsmount结构体mnt和super_block结构体sb之间的联系,见图二
void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
{
mnt->mnt_sb = sb;
mnt->mnt_root = dget(sb->s_root);
}
图二