sysfs文件系统和Kobject一起,可以将Kernel的数据结构导出到用户空间,以文件目录结构的形式,提供对这些数据结构(以及数据结构的属性)的访问支持。除了Kobject外,一个比较重要的结构就是attribute。在sysfs中,为什么会有attribute的概念呢?其实它是对应kobject而言的,指的是kobject的“属性”。我们知道,sysfs中的目录描述了kobject,而kobject是特定数据类型变量(如struct device)的体现。因此kobject的属性,就是这些变量的属性。它可以是任何东西,名称、一个内部变量、一个字符串等等。而attribute,在sysfs文件系统中是以文件的形式提供的,即:kobject的所有属性,都在它对应的sysfs目录下以文件的形式呈现。这些文件一般是可读、写的,而kernel中定义了这些属性的模块,会根据用户空间的读写操作,记录和返回这些attribute的值。
总结一下:所谓的attibute,就是内核空间和用户空间进行信息交互的一种方法。例如某个driver定义了一个变量,却希望用户空间程序可以修改该变量,以控制driver的运行行为,那么就可以将该变量以sysfs attribute的形式开放出来。
Linux内核中,attribute分为普通的attribute和二进制attribute,如下:
struct attribute {
const char *name;
umode_t mode;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
bool ignore_lockdep:1;
struct lock_class_key *key;
struct lock_class_key skey;
#endif
};
struct bin_attribute {
struct attribute attr;
size_t size;
void *private;
ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *,
char *, loff_t, size_t);
ssize_t (*write)(struct file *,struct kobject *, struct bin_attribute *,
char *, loff_t, size_t);
int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr,
struct vm_area_struct *vma);
};
struct attribute为普通的attribute,使用该attribute生成的sysfs文件,只能用字符串的形式读写(后面会说为什么)。而struct bin_attribute在struct attribute的基础上,增加了read、write等函数,因此它所生成的sysfs文件可以用任何方式读写。
说完基本概念,我们要问两个问题:
Kernel怎么把attribute变成sysfs中的文件呢?
用户空间对sysfs的文件进行的读写操作,怎么传递给Kernel呢?
我们通过内核的一个具体例子,来分析该过程。该例子的源码为/samples/kobject/kobject-example.c中
1 kobject-example.c 实例
还sample使用kobj_attribute,对attribute又封装了一层:
static struct kobj_attribute foo_attribute =
__ATTR(foo, 0666, foo_show, foo_store);
static struct kobj_attribute baz_attribute =
__ATTR(baz, 0666, b_show, b_store);
static struct kobj_attribute bar_attribute =
__ATTR(bar, 0666, b_show, b_store);
#define __ATTR(_name,_mode,_show,_store) { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
}
定义了三个kobj_attribute,最终会在sysfs中生成foo,baz,bar三个文件,看一下这些属性文件的具体处理函数:
static int foo;
static int baz;
static int bar;
/*
* The "foo" file where a static variable is read from and written to.
*/
static ssize_t foo_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", foo);
}
static ssize_t foo_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
sscanf(buf, "%du", &foo);
return count;
}
static ssize_t b_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
int var;
if (strcmp(attr->attr.name, "baz") == 0)
var = baz;
else
var = bar;
return sprintf(buf, "%d\n", var);
}
static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
int var;
sscanf(buf, "%du", &var);
if (strcmp(attr->attr.name, "baz") == 0)
baz = var;
else
bar = var;
return count;
}
最终把这些属性文件放到一个属性组中,调用属性组的注册方法把这些属性文件注册到sysfs文件系统中:
static struct attribute *attrs[] = {
&foo_attribute.attr,
&baz_attribute.attr,
&bar_attribute.attr,
NULL, /* need to NULL terminate the list of attributes */
};
/*
* An unnamed attribute group will put all of the attributes directly in
* the kobject directory. If we specify a name, a subdirectory will be
* created for the attributes with the directory being the name of the
* attribute group.
*/
static struct attribute_group attr_group = {
.attrs = attrs,
};
static int __init example_init(void)
{
int retval;
/*
* Create a simple kobject with the name of "kobject_example",
* located under /sys/kernel/
*
* As this is a simple directory, no uevent will be sent to
* userspace. That is why this function should not be used for
* any type of dynamic kobjects, where the name and number are
* not known ahead of time.
*/
//先为该属性添加kobject目录,他的parent为kernel_kobj,所以上层目录应该是/sys/kernel
example_kobj = kobject_create_and_add("kobject_example", kernel_kobj);
if (!example_kobj)
return -ENOMEM;
/* Create the files associated with this kobject */
//在kobject目录下面添加属性文件
retval = sysfs_create_group(example_kobj, &attr_group);
if (retval)
kobject_put(example_kobj);
return retval;
}
简单分析一下注册过程,kobject_create_and_add函数,前一篇文章已经分析过了,创建一个名字为kobject_example的Kobject结构,并且设置父目录为kernel_kobj,同时为创建一个sysfs_dirent结构,来描述该目录节点,创建完以后,目录结构如下/sys/kernel/kobject_example
然后调用sysfs_create_group为该目录添加属性文件。
sysfs_create_group
------------>internal_create_group
static int internal_create_group(struct kobject *kobj, int update,
const struct attribute_group *grp)
{
struct sysfs_dirent *sd;
int error;
BUG_ON(!kobj || (!update && !kobj->sd));
/* Updates may happen before the object has been instantiated */
if (unlikely(update && !kobj->sd))
return -EINVAL;
if (!grp->attrs) {
WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n",
kobj->name, grp->name ? "" : grp->name);
return -EINVAL;
}
if (grp->name) { //没有诶grp设置名字,所以这边不走
error = sysfs_create_subdir(kobj, grp->name, &sd);
if (error)
return error;
} else
sd = kobj->sd;
sysfs_get(sd);
error = create_files(sd, kobj, grp, update);//在该函数中,创建属性文件
if (error) {
if (grp->name)
sysfs_remove_subdir(sd);
}
sysfs_put(sd);
return error;
}
internal_create_group
-------------->create_files
static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
const struct attribute_group *grp, int update)
{
struct attribute *const* attr;
int error = 0, i;
for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
umode_t mode = 0;
/* in update mode, we're changing the permissions or
* visibility. Do this by first removing then
* re-adding (if required) the file */
if (update)
sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
if (!mode)
continue;
}
error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR,
(*attr)->mode | mode);
if (unlikely(error))
break;
}
if (error)
remove_files(dir_sd, kobj, grp);
return error;
}
遍历attribute_group中的attribute,为每个attribute结构依次调用sysfs_add_file_mode函数,生成属性文件。
internal_create_group
-------------->create_files
----------------->sysfs_add_file_mode
int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
const struct attribute *attr, int type, umode_t amode)
{
umode_t mode = (amode & S_IALLUGO) | S_IFREG;
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
const void *ns;
int rc;
rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns);
if (rc)
return rc;
//利用attribute的名字,创建sysfs_dirent文件
sd = sysfs_new_dirent(attr->name, mode, type);
if (!sd)
return -ENOMEM;
sd->s_ns = ns;
sd->s_attr.attr = (void *)attr;//把该属性文件赋值给sysfs_dirent结构的s_attr.attr
sysfs_dirent_init_lockdep(sd);
sysfs_addrm_start(&acxt, dir_sd);
rc = sysfs_add_one(&acxt, sd);//设置sd->s_parent为上层目录的sysfs_dirent结构,并把其链接入上层目录sysfs_dirent的孩子节点
sysfs_addrm_finish(&acxt);
if (rc)
sysfs_put(sd);
return rc;
}
至此我们可以看到,内核为每个属性创建了一个sysfs_dirent结构,并和上层的sysfs_dirent结构相联系起来。
sd->s_attr.attr = (void *)attr;//把该属性文件赋值给sysfs_dirent结构的s_attr.attr
最终生成的文件结构为/sys/kernel/kobject_example/baz,/sys/kernel/kobject_example/bar和/sys/kernel/kobject_example/foo。以/sys/kernel/kobject_example/baz为例,最后初始化完以后各结构的数据关系大概如下:
同时我们也可以看到,上面的属性提供了show和store两个函数,这两个函数对应文件读写操作分别为read和write操作,那么上层如何在读写属性文件的时候,如何和这些操作对应起来呢,下面接着分析。
2 sysfs属性文件读写原理分析
从上面的分析可以看到,为sysfs文件创建目录和属性节点的时候,并没有为这些目录创建dentry和inode,那么具体是什么时候为这些节点创建dentry和inode的呢,先从open系统调用开始看。我们以打开/sys/kernel/kobject_example/baz这个文件为例:
2.1 open操作
整个open 函数调用假设所open节点没有dentry节点和inode节点,需要临时创建,函数调用流程如下(省去了中间节点dentry和inode创建过程):
do_sys_open
----------->do_filp_open
-------------->path_openat
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
struct file *base = NULL;
struct file *file;
struct path path;
int opened = 0;
int error;
file = get_empty_filp();
if (IS_ERR(file))
return file;
file->f_flags = op->open_flag;
//用挂载路径的根目录的mount挂载实例以及root dentry 初始化path
error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out;
current->total_link_count = 0;
//完成对目录的搜索,一直搜索到子节点的上层父节点,比如/mnt/a目录,则搜索到mnt
error = link_path_walk(pathname->name, nd);
if (unlikely(error))
goto out;
error = do_last(nd, &path, file, op, &opened, pathname);//搜索最后一个节点,如/mnt/a目录,完成对a的搜索
while (unlikely(error > 0)) { /* trailing symlink */
struct path link = path;
void *cookie;
if (!(nd->flags & LOOKUP_FOLLOW)) {
path_put_conditional(&path, nd);
path_put(&nd->path);
error = -ELOOP;
break;
}
error = may_follow_link(&link, nd);
if (unlikely(error))
break;
nd->flags |= LOOKUP_PARENT;
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
error = follow_link(&link, nd, &cookie);
if (unlikely(error))
break;
error = do_last(nd, &path, file, op, &opened, pathname);
put_link(nd, &link, cookie);
}
out:
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
path_put(&nd->root);
if (base)
fput(base);
if (!(opened & FILE_OPENED)) {
BUG_ON(!error);
put_filp(file);
}
if (unlikely(error)) {
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
else
error = -ESTALE;
}
file = ERR_PTR(error);
}
return file;
}
path_init函数的主要作用是用挂载路径的根目录的mount挂载实例以及root dentry 初始化path,该函数在https://blog.csdn.net/oqqYuJi12345678/article/details/101689334这篇文章中分析过,这边就不展开了,对于我们这边打开/sys/kernel/kobject_example/baz文件为例,假设我们的根文件系统使用的是ubi文件系统,那么sys则是ubi文件系统里面的一个目录,sysfs文件系统挂载在该目录下,经过path_init函数,nd中存储的应该是是ubi文件系统的root dentry和mount 实例。
link_path_walk函数之前在文章已经分析过了,不过之前主要是默认能找到dentry的情况,那么如果找不到dentry会是什么情况呢。先来看/sys/kernel/kobject_example/baz这个搜索路径,假设sys这个目录的dentry是存在的,而由sysfs文件系统的初始化过程知道,sysfs文件系统初始化的时候,为其建立过dentry结构,所以目录搜索的时候,搜素sys目录,最终找到的dentry为sysfs文件系统的root dentry,而下面的kernel 和kobject_example目录,以及baz属性文件,是没有为其建立dentry的。
link_path_walk
------------>walk_component
link_path_walk中解析整个路径名,然后计算路径中每个子串的hash值,调用walk_component来解析这个子串,获得dentry和inode。根据上面的分析,在kernel这个子串解析的时候,应该是找不到dentry的:
static inline int walk_component(struct nameidata *nd, struct path *path,
int follow)
{
struct inode *inode;
int err;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
* parent relationships.
*/
if (unlikely(nd->last_type != LAST_NORM))
return handle_dots(nd, nd->last_type);
err = lookup_fast(nd, path, &inode);
if (unlikely(err)) {
if (err < 0)
goto out_err;
err = lookup_slow(nd, path);
if (err < 0)
goto out_err;
inode = path->dentry->d_inode;
}
err = -ENOENT;
if (!inode)
goto out_path_put;
if (should_follow_link(inode, follow)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
err = -ECHILD;
goto out_err;
}
}
BUG_ON(inode != path->dentry->d_inode);
return 1;
}
path_to_nameidata(path, nd);
nd->inode = inode;
return 0;
out_path_put:
path_to_nameidata(path, nd);
out_err:
terminate_walk(nd);
return err;
}
lookup_fast中查找失败,返回值不为0,所以会进入lookup_slow:
static int lookup_slow(struct nameidata *nd, struct path *path)
{
struct dentry *dentry, *parent;
int err;
parent = nd->path.dentry;
BUG_ON(nd->inode != parent->d_inode);
mutex_lock(&parent->d_inode->i_mutex);
dentry = __lookup_hash(&nd->last, parent, nd->flags);
mutex_unlock(&parent->d_inode->i_mutex);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
path->mnt = nd->path.mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
if (unlikely(err < 0)) {
path_put_conditional(path, nd);
return err;
}
if (err)
nd->flags |= LOOKUP_JUMPED;
return 0;
}
主要关注__lookup_hash函数:
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, unsigned int flags)
{
bool need_lookup;
struct dentry *dentry;
dentry = lookup_dcache(name, base, flags, &need_lookup);
if (!need_lookup)
return dentry;
return lookup_real(base->d_inode, dentry, flags);
}
在lookup_dcache函数中会根据hash值去查找dentry,没找到就会把为其分配dentry结构,并对其初始化,把其parent指向上层目录的dentry,并把need_lookup设置为true,接着调用lookup_real:
static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *old;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir))) {
dput(dentry);
return ERR_PTR(-ENOENT);
}
old = dir->i_op->lookup(dir, dentry, flags);
if (unlikely(old)) {
dput(dentry);
dentry = old;
}
return dentry;
}
传给lookup_real的第一个参数是base->d_inode,就是上层目录的dentry结构,可以看到在lookup_real函数中调用了dir->i_op->lookup,那么这个函数应该是哪个函数呢,可以看一下sysfs文件系统的inode是如何初始化的。
在sysfs文件系统初始化的时候:
sysfs_mount
--------->sysfs_fill_super
----------->sysfs_get_inode
-------------->sysfs_init_inode
static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
inode->i_op = &sysfs_inode_operations;
set_default_inode_attr(inode, sd->s_mode);
sysfs_refresh_inode(sd, inode);
/* initialize inode according to type */
switch (sysfs_type(sd)) {
case SYSFS_DIR:
inode->i_op = &sysfs_dir_inode_operations;
inode->i_fop = &sysfs_dir_operations;
break;
case SYSFS_KOBJ_ATTR:
inode->i_size = PAGE_SIZE;
inode->i_fop = &sysfs_file_operations;
break;
case SYSFS_KOBJ_BIN_ATTR:
bin_attr = sd->s_bin_attr.bin_attr;
inode->i_size = bin_attr->size;
inode->i_fop = &bin_fops;
break;
case SYSFS_KOBJ_LINK:
inode->i_op = &sysfs_symlink_inode_operations;
break;
default:
BUG();
}
unlock_new_inode(inode);
}
当inode代表目录的时候,inode的i_op函数会被初始化为sysfs_dir_inode_operations:
const struct inode_operations sysfs_dir_inode_operations = {
.lookup = sysfs_lookup,
.permission = sysfs_permission,
.setattr = sysfs_setattr,
.getattr = sysfs_getattr,
.setxattr = sysfs_setxattr,
};
所以上面的dir->i_op->lookup函数为sysfs_lookup,具体看一下该函数的实现:
static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *ret = NULL;
struct dentry *parent = dentry->d_parent;
struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *sd;
struct inode *inode;
enum kobj_ns_type type;
const void *ns;
mutex_lock(&sysfs_mutex);
type = sysfs_ns_type(parent_sd);
ns = sysfs_info(dir->i_sb)->ns[type];
//parent的sd结构,在其s_dir.children.rb_node红黑树结构上,找到满足条件的sysfs_dirent结构
sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name);
/* no such entry */
if (!sd) {
ret = ERR_PTR(-ENOENT);
goto out_unlock;
}
把当前子串的sysfs_dirent结构赋值给dentry->d_fsdata
dentry->d_fsdata = sysfs_get(sd);
/* attach dentry and inode */
//分配并初始化inode结构
inode = sysfs_get_inode(dir->i_sb, sd);
if (!inode) {
ret = ERR_PTR(-ENOMEM);
goto out_unlock;
}
/* instantiate and hash dentry */
ret = d_materialise_unique(dentry, inode);
out_unlock:
mutex_unlock(&sysfs_mutex);
return ret;
}
从前面可以看到,sysfs文件系统会位每个节点创建sysfs_dirent结构,所以sysfs_find_dirent函数一定会找到属于该节点的sysfs_dirent结构。通过这个操作把统一节点的dentry和sysfs_dirent关联起来:dentry->d_fsdata = sysfs_get(sd);最终,我们得到了kernel目录的inode和dentry结构,同理,接着往下搜索,会找到kobject_example目录的inode和dentry结构,而我们最终需要的就是baz属性文件的inode和dentry结构。baz属性文件的inode和dentry结构由do_last函数完成。调用完link_path_walk函数,完成对目录的搜索,然后调用do_last函数:
path_openat
----------->do_last
static int do_last(struct nameidata *nd, struct path *path,
struct file *file, const struct open_flags *op,
int *opened, struct filename *name)
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
bool got_write = false;
int acc_mode = op->acc_mode;
struct inode *inode;
bool symlink_ok = false;
struct path save_parent = { .dentry = NULL, .mnt = NULL };
bool retried = false;
int error;
nd->flags &= ~LOOKUP_PARENT;
nd->flags |= op->intent;
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
if (!(open_flag & O_CREAT)) {
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
symlink_ok = true;
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, path, &inode);
if (likely(!error))
goto finish_lookup;
if (error < 0)
goto out;
BUG_ON(nd->inode != dir->d_inode);
} else {
/* create side of things */
/*
* This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
* has been cleared when we got to the last component we are
* about to look up
*/
error = complete_walk(nd);
if (error)
return error;
audit_inode(name, dir, LOOKUP_PARENT);
error = -EISDIR;
/* trailing slashes? */
if (nd->last.name[nd->last.len])
goto out;
}
retry_lookup:
if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
error = mnt_want_write(nd->path.mnt);
if (!error)
got_write = true;
/*
* do _not_ fail yet - we might not need that or fail with
* a different error; let lookup_open() decide; we'll be
* dropping this one anyway.
*/
}
mutex_lock(&dir->d_inode->i_mutex);
------------------------------------------------------(1)
error = lookup_open(nd, path, file, op, got_write, opened);
mutex_unlock(&dir->d_inode->i_mutex);
if (error <= 0) {
if (error)
goto out;
if ((*opened & FILE_CREATED) ||
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
audit_inode(name, file->f_path.dentry, 0);
goto opened;
}
if (*opened & FILE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
acc_mode = MAY_OPEN;
path_to_nameidata(path, nd);
goto finish_open_created;
}
/*
* create/update audit record if it already exists.
*/
if (path->dentry->d_inode)
audit_inode(name, path->dentry, 0);
/*
* If atomic_open() acquired write access it is dropped now due to
* possible mount and symlink following (this might be optimized away if
* necessary...)
*/
if (got_write) {
mnt_drop_write(nd->path.mnt);
got_write = false;
}
error = -EEXIST;
if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
goto exit_dput;
error = follow_managed(path, nd->flags);
if (error < 0)
goto exit_dput;
if (error)
nd->flags |= LOOKUP_JUMPED;
BUG_ON(nd->flags & LOOKUP_RCU);
inode = path->dentry->d_inode;
finish_lookup:
/* we _can_ be in RCU mode here */
error = -ENOENT;
if (!inode) {
path_to_nameidata(path, nd);
goto out;
}
if (should_follow_link(inode, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
error = -ECHILD;
goto out;
}
}
BUG_ON(inode != path->dentry->d_inode);
return 1;
}
if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
path_to_nameidata(path, nd);
} else {
save_parent.dentry = nd->path.dentry;
save_parent.mnt = mntget(path->mnt);
nd->path.dentry = path->dentry;
}
nd->inode = inode;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
error = complete_walk(nd);
if (error) {
path_put(&save_parent);
return error;
}
error = -EISDIR;
if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
goto out;
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
goto out;
audit_inode(name, nd->path.dentry, 0);
finish_open:
if (!S_ISREG(nd->inode->i_mode))
will_truncate = false;
if (will_truncate) {
error = mnt_want_write(nd->path.mnt);
if (error)
goto out;
got_write = true;
}
finish_open_created:
------------------------------------------------------(2)
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto out;
file->f_path.mnt = nd->path.mnt;
error = finish_open(file, nd->path.dentry, NULL, opened);
if (error) {
if (error == -EOPENSTALE)
goto stale_open;
goto out;
}
opened:
error = open_check_o_direct(file);
if (error)
goto exit_fput;
error = ima_file_check(file, op->acc_mode);
if (error)
goto exit_fput;
if (will_truncate) {
error = handle_truncate(file);
if (error)
goto exit_fput;
}
out:
if (got_write)
mnt_drop_write(nd->path.mnt);
path_put(&save_parent);
terminate_walk(nd);
return error;
exit_dput:
path_put_conditional(path, nd);
goto out;
exit_fput:
fput(file);
goto out;
stale_open:
/* If no saved parent or already retried then can't retry */
if (!save_parent.dentry || retried)
goto out;
BUG_ON(save_parent.dentry != dir);
path_put(&nd->path);
nd->path = save_parent;
nd->inode = dir->d_inode;
save_parent.mnt = NULL;
save_parent.dentry = NULL;
if (got_write) {
mnt_drop_write(nd->path.mnt);
got_write = false;
}
retried = true;
goto retry_lookup;
}
(1)在lookup_open函数里面,完成最后节点dentry和inode的创建:
static int lookup_open(struct nameidata *nd, struct path *path,
struct file *file,
const struct open_flags *op,
bool got_write, int *opened)
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
struct dentry *dentry;
int error;
bool need_lookup;
*opened &= ~FILE_CREATED;
//创建dentry
dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
/* Cached positive dentry: will open in f_op->open */
if (!need_lookup && dentry->d_inode)
goto out_no_open;
if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
return atomic_open(nd, dentry, path, file, op, got_write,
need_lookup, opened);
}
if (need_lookup) {
BUG_ON(dentry->d_inode);
//创建inode
dentry = lookup_real(dir_inode, dentry, nd->flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
/* Negative dentry, just create the file */
if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
umode_t mode = op->mode;
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current_umask();
/*
* This write is needed to ensure that a
* rw->ro transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in finish_open().
*/
if (!got_write) {
error = -EROFS;
goto out_dput;
}
*opened |= FILE_CREATED;
error = security_path_mknod(&nd->path, dentry, mode, 0);
if (error)
goto out_dput;
error = vfs_create(dir->d_inode, dentry, mode,
nd->flags & LOOKUP_EXCL);
if (error)
goto out_dput;
}
out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
return 1;
out_dput:
dput(dentry);
return error;
}
(2)最后调用finish_open,完成打开操作
int finish_open(struct file *file, struct dentry *dentry,int (*open)(struct inode *, struct file *),int *opened)
{
int error;
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
error = do_dentry_open(file, open, current_cred());
if (!error)
*opened |= FILE_OPENED;
return error;
}
file->f_path.dentry = dentry;这个操作把dentry和file关联起来
上面函数在do_dentry_open中,会调用inode->i_fop的open函数做进一步操作,而且如下代码:
f->f_op = fops_get(inode->i_fop);会把inode 的i_fop赋值给file->f_op,后面的read,write操作都会用到
从上面的属性文件的inode初始化可以看出,该inode的操作函数集是sysfs_file_operations:
const struct file_operations sysfs_file_operations = {
.read = sysfs_read_file,
.write = sysfs_write_file,
.llseek = generic_file_llseek,
.open = sysfs_open_file,
.release = sysfs_release,
.poll = sysfs_poll,
};
具体open函数为:
static int sysfs_open_file(struct inode *inode, struct file *file)
{
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;//前面有过关联操作,这边把该节点的 sysfs_dirent结构取出来
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;//获取parent目录的kobject,因为属性文件本身没有kobject 结构
struct sysfs_buffer *buffer;
const struct sysfs_ops *ops;
int error = -EACCES;
/* need attr_sd for attr and ops, its parent for kobj */
if (!sysfs_get_active(attr_sd))
return -ENODEV;
/* every kobject with an attribute needs a ktype assigned */
if (kobj->ktype && kobj->ktype->sysfs_ops)
ops = kobj->ktype->sysfs_ops; //获取kobject的操作函数
else {
WARN(1, KERN_ERR "missing sysfs attribute operations for "
"kobject: %s\n", kobject_name(kobj));
goto err_out;
}
/* File needs write support.
* The inode's perms must say it's ok,
* and we must have a store method.
*/
if (file->f_mode & FMODE_WRITE) {
if (!(inode->i_mode & S_IWUGO) || !ops->store)
goto err_out;
}
/* File needs read support.
* The inode's perms must say it's ok, and we there
* must be a show method for it.
*/
if (file->f_mode & FMODE_READ) {
if (!(inode->i_mode & S_IRUGO) || !ops->show)
goto err_out;
}
/* No error? Great, allocate a buffer for the file, and store it
* it in file->private_data for easy access.
*/
error = -ENOMEM;
//分配一个buffer,这个buffer很重要,后面read操作还会用到
buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
if (!buffer)
goto err_out;
mutex_init(&buffer->mutex);
buffer->needs_read_fill = 1;
buffer->ops = ops; //保存ops函数,read的时候会用到
file->private_data = buffer; //把buffer记录到file结构中
/* make sure we have open dirent struct */
error = sysfs_get_open_dirent(attr_sd, buffer);
if (error)
goto err_free;
/* open succeeded, put active references */
sysfs_put_active(attr_sd);
return 0;
err_free:
kfree(buffer);
err_out:
sysfs_put_active(attr_sd);
return error;
}
上面函数中分配了一个buffer,并把kobject的操作函数放入该buffer中,并把该buffer保存在file->private_data中,后面read会用到,这个时候,open操作就完成了,看一下read函数,是如何最终调用属性文件的show方法的。总结一下,对于/sys/kernel/kobject_example/baz文件的检索,我们在为kernel目录创建dentry和inode,具体是如何做的,可以大概参考下面这张图:
后面的目录和文件建立,原理类似。
2.2 read操作
整个read的函数调用流程大概如下:
read系统调用会调用到vfs_read:
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
}
return ret;
}
从vfs_read中可以看到,会调用 file->f_op->read,在前面的open函数中,已经为f_op初始化过了,对于sysfs文件系统,这里的read函数应该是sysfs_read_file:
static ssize_t
sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
struct sysfs_buffer * buffer = file->private_data;//在open的时候把buffer结构放在了private_data位置
ssize_t retval = 0;
mutex_lock(&buffer->mutex);
if (buffer->needs_read_fill || *ppos == 0) {
//读取属性文件的操作函数,file->f_path.dentry在open函数中初始化为该文件节点的dentry
retval = fill_read_buffer(file->f_path.dentry,buffer);
if (retval)
goto out;
}
pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
__func__, count, *ppos, buffer->page);
//把读取到的数据复制到用户层
retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
buffer->count);
out:
mutex_unlock(&buffer->mutex);
return retval;
}
真正的读操作在fill_read_buffer函数中,我们进去看一下:
static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;//在open 操作中,d_fsdata被初始化为该文件节点的sysfs_dirent结构
struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;//获取parent节点的kobject
const struct sysfs_ops * ops = buffer->ops;//该ops也是在open的时候被初始化
int ret = 0;
ssize_t count;
if (!buffer->page)
buffer->page = (char *) get_zeroed_page(GFP_KERNEL);
if (!buffer->page)
return -ENOMEM;
/* need attr_sd for attr and ops, its parent for kobj */
if (!sysfs_get_active(attr_sd))
return -ENODEV;
buffer->event = atomic_read(&attr_sd->s_attr.open->event);
count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page);
sysfs_put_active(attr_sd);
/*
* The code works fine with PAGE_SIZE return but it's likely to
* indicate truncated result or overflow in normal use cases.
*/
if (count >= (ssize_t)PAGE_SIZE) {
print_symbol("fill_read_buffer: %s returned bad count\n",
(unsigned long)ops->show);
/* Try to struggle along */
count = PAGE_SIZE - 1;
}
if (count >= 0) {
buffer->needs_read_fill = 0;
buffer->count = count;
} else {
ret = count;
}
return ret;
}
上面函数,调用了ops->show,这个函数具体是什么呢,在上面注册该属性文件目录,创建目录的kobect的时候,
example_init
----------->kobject_create_and_add
--------------->kobject_create
默认把kobject->ktype设置为dynamic_kobj_ktype:
static struct kobj_type dynamic_kobj_ktype = {
.release = dynamic_kobj_release,
.sysfs_ops = &kobj_sysfs_ops,
};
const struct sysfs_ops kobj_sysfs_ops = {
.show = kobj_attr_show,
.store = kobj_attr_store,
};
static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct kobj_attribute *kattr;
ssize_t ret = -EIO;
kattr = container_of(attr, struct kobj_attribute, attr);
if (kattr->show)
ret = kattr->show(kobj, kattr, buf);
return ret;
}
可以看到在kobj_attr_show函数中,我们在fill_read_buffer函数中传入的第二个参数为attr_sd->s_attr.attr,这个就是第一节里面注册的属性,然后通过该属性获取kobj_attribute,调用其show方法,即使上面的b_show函数,通过上面一层层调用,终于进入到属性文件的操作函数里面了!具体的read文件操作流程可以用下面的图大概描述:
最后对属性 文件的读写操作做个总结。
1 attribute属性文件的read操作,会由VFS转到sysfs_file_operations的read(也就是sysfs_read_file)接口上
2接着会跳转到kobject(也就是上层目录目录)级别的读写,由从属于该kobject的ktype提供的ops->show接口接着处理
3最终调用属性文件自身提供的show函数做处理。
并且,所有需要使用attribute的模块,都不会直接定义struct attribute变量,而是通过一个自定义的数据结构,该数据结构的一个成员是struct attribute类型的变量,并提供show和store回调函数。然后在该模块ktype所对应的struct sysfs_ops变量中,实现该本模块整体的show和store函数,并在被调用时,转接到自定义数据结构(这边是kobj_attribute)中的show和store函数中。这样,每个atrribute文件,实际上对应到一个自定义数据结构变量中了。