例1:linux中sysctl调整内核参数,实际上调整的是/proc/sys/目录下面的配置。对应的内核源码在kernel/sysctl.c中。
static struct ctl_table sysctl_base_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = kern_table,
},
{
.procname = "vm",
.mode = 0555,
.child = vm_table,
},
{
.procname = "fs",
.mode = 0555,
.child = fs_table,
},
{
.procname = "debug",
.mode = 0555,
.child = debug_table,
},
{
.procname = "dev",
.mode = 0555,
.child = dev_table,
},
{ }
};
int __init sysctl_init(void)
{
struct ctl_table_header *hdr;
hdr = register_sysctl_table(sysctl_base_table);
kmemleak_not_leak(hdr);
return 0;
}
register_sysctl_table在/proc/sys下面创建。sys目录在fs/proc/proc_sysctl.c:
int __init proc_sys_init(void)
{
struct proc_dir_entry *proc_sys_root;
proc_sys_root = proc_mkdir("sys", NULL);
proc_sys_root->proc_iops = &proc_sys_dir_operations;
proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
proc_sys_root->nlink = 0;
return sysctl_init();
}
例2:读写/proc/cpuinfo 文件
文件打开
fs/proc/cpuinfo.c:
static int cpuinfo_open(struct inode *inode, struct file *file)
{
arch_freq_prepare_all();
return seq_open(file, &cpuinfo_op);
}
static const struct file_operations proc_cpuinfo_operations = {
.open = cpuinfo_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init proc_cpuinfo_init(void)
{
proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
return 0;
}
proc_create函数:
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct file_operations *proc_fops, void *data)
{
struct proc_dir_entry *p;
BUG_ON(proc_fops == NULL);
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
p->proc_fops = proc_fops; ---------------------proc_fops是file_operations结构体
return proc_register(parent, p); //pde_subdir_insert 将proc_dir_entry插入parent的到树中。
}
EXPORT_SYMBOL(proc_create_data);
struct proc_dir_entry *proc_create(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct file_operations *proc_fops)
{
return proc_create_data(name, mode, parent, proc_fops, NULL);
}
EXPORT_SYMBOL(proc_create);
open文件的dump信息:
open:
[ 8364.208236] CPU: 0 PID: 1290 Comm: systemd-hostnam Not tainted 4.19.0-fix-full-10+ #36
[ 8364.216146] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 8364.221618] Call trace:
[ 8364.224058] dump_backtrace+0x0/0x1b8
[ 8364.227708] show_stack+0x24/0x30
[ 8364.231013] dump_stack+0x90/0xb4
[ 8364.234317] cpuinfo_open+0x1c/0x40
[ 8364.237793] proc_reg_open+0xa0/0x1c0
[ 8364.241444] do_dentry_open+0x1a8/0x340
[ 8364.245267] vfs_open+0x38/0x48
[ 8364.248396] do_last+0x28c/0x7d0
[ 8364.251612] path_openat+0x88/0x2a0
[ 8364.255088] do_filp_open+0x88/0x108
[ 8364.258650] do_sys_open+0x1a8/0x238
[ 8364.262214] __arm64_sys_openat+0x2c/0x38
[ 8364.266211] el0_svc_handler+0x84/0x140
[ 8364.270033] el0_svc+0x8/0xc
proc_reg_open是在fs/proc/inode.c:这是默认的proc文件系统的file_operations:
static const struct file_operations proc_reg_file_ops = {
.llseek = proc_reg_llseek,
.read = proc_reg_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = proc_reg_compat_ioctl,
#endif
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
proc_reg_file_ops是proc文件系统下默认的fops,其关联在inode->f_ops下:在open的do_last会根据文件路径来关联inode和&proc_reg_file_ops。因此后续的文件操作,比如读写,通过vfs_open后会找到默认的file_operations对应的函数,然后再调用pde->proc_fops->open/read/write函数。
vfs_open:
int vfs_open(const struct path *path, struct file *file)
{
file->f_path = *path;
return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
}
精简
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *))
{
..................
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
/* Ensure that we skip any errors that predate opening of the file */
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
f->f_op = fops_get(inode->i_fop); ---------------------文件默认的f_ops是inode中保存的
f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
if (!open)
open = f->f_op->open;
.................
}
比如:
//proc_fops是cpuinfo.c定义的file_operations结构体指针。
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
read = pde->proc_fops->read; --------------read函数
if (read)
rv = read(file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
write = pde->proc_fops->write; -------------write函数
if (write)
rv = write(file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}
每个文件的 inode创建。(vfs会在open的过程创建file和inode)
[ 45.018096] Call trace: [ 45.020532] dump_backtrace+0x0/0x1b8 [ 45.024180] show_stack+0x24/0x30 [ 45.027482] dump_stack+0x90/0xb4 [ 45.030784] proc_get_inode+0x164/0x168 [ 45.034608] proc_lookup_de+0x84/0xf8 [ 45.038257] proc_lookup+0x2c/0x38 [ 45.041647] lookup_open+0x204/0x638 //fs/namei.c [ 45.045209] do_last+0x604/0x7d0 //fs/namei.c [ 45.048425] path_openat+0x88/0x2a0 [ 45.051901] do_filp_open+0x88/0x108 [ 45.055463] do_sys_open+0x1a8/0x238 [ 45.059025] __arm64_sys_openat+0x2c/0x38 [ 45.063022] el0_svc_handler+0x84/0x140 [ 45.066845] el0_svc+0x8/0xc
proc_get_inode函数:
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de;
if (is_empty_pde(de)) {
make_empty_dir_inode(inode);
return inode;
}
if (de->mode) {
inode->i_mode = de->mode;
inode->i_uid = de->uid;
inode->i_gid = de->gid;
}
if (de->size)
inode->i_size = de->size;
if (de->nlink)
set_nlink(inode, de->nlink);
WARN_ON(!de->proc_iops);
inode->i_op = de->proc_iops;
if (de->proc_fops) {
if (S_ISREG(inode->i_mode)) {
#ifdef CONFIG_COMPAT
if (!de->proc_fops->compat_ioctl)
inode->i_fop =
&proc_reg_file_ops_no_compat;
else
#endif
inode->i_fop = &proc_reg_file_ops; -------------这里设置
} else {
inode->i_fop = de->proc_fops;
}
}
} else
pde_put(de);
return inode;
}
proc_reg_open会调用cpuinfo.c下定义的file_operations的open函数:
static int proc_reg_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*open)(struct inode *, struct file *);
int (*release)(struct inode *, struct file *);
struct pde_opener *pdeo;
/*
* Ensure that
* 1) PDE's ->release hook will be called no matter what
* either normally by close()/->release, or forcefully by
* rmmod/remove_proc_entry.
*
* 2) rmmod isn't blocked by opening file in /proc and sitting on
* the descriptor (including "rmmod foo </proc/foo" scenario).
*
* Save every "struct file" with custom ->release hook.
*/
if (!use_pde(pde))
return -ENOENT;
release = pde->proc_fops->release;
if (release) {
pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
if (!pdeo) {
rv = -ENOMEM;
goto out_unuse;
}
}
open = pde->proc_fops->open;//--------------------------这里调用file_operations 下的open函数
if (open)
rv = open(inode, file);
if (release) {
if (rv == 0) {
/* To know what to release. */
pdeo->file = file;
pdeo->closing = false;
pdeo->c = NULL;
spin_lock(&pde->pde_unload_lock);
list_add(&pdeo->lh, &pde->pde_openers);
spin_unlock(&pde->pde_unload_lock);
} else
kmem_cache_free(pde_opener_cache, pdeo);
}
out_unuse:
unuse_pde(pde);
return rv;
}
cpuinfo_open调用了seq_open函数,传递了cpuinfo_ops数据,具体定义在arch/arm64/kernel/cpuinfo.c文件中。是具体的展示内容。
const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
.show = c_show
};
seq_open函数:fs/seq_file.c
创建seq_file结构。初始化相关的seq_operations。后续读写使用的接口。
int seq_open(struct file *file, const struct seq_operations *op)
{
struct seq_file *p;
WARN_ON(file->private_data);
p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
if (!p)
return -ENOMEM;
file->private_data = p;
mutex_init(&p->lock);
p->op = op;
// No refcounting: the lifetime of 'p' is constrained
// to the lifetime of the file.
p->file = file;
/*
* Wrappers around seq_open(e.g. swaps_open) need to be
* aware of this. If they set f_version themselves, they
* should call seq_open first and then set f_version.
*/
file->f_version = 0;
/*
* seq_files support lseek() and pread(). They do not implement
* write() at all, but we clear FMODE_PWRITE here for historical
* reasons.
*
* If a client of seq_files a) implements file.write() and b) wishes to
* support pwrite() then that client will need to implement its own
* file.open() which calls seq_open() and then sets FMODE_PWRITE.
*/
file->f_mode &= ~FMODE_PWRITE;
return 0;
}
文件读
read:
[ 8364.272949] CPU: 0 PID: 1290 Comm: systemd-hostnam Not tainted 4.19.0-fix-full-10+ #36
[ 8364.280855] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 8364.286328] Call trace:
[ 8364.288767] dump_backtrace+0x0/0x1b8
[ 8364.292417] show_stack+0x24/0x30
[ 8364.295721] dump_stack+0x90/0xb4
[ 8364.299025] c_show+0x40/0x2d8
[ 8364.302071] seq_read+0xd4/0x4a8
[ 8364.305287] proc_reg_read+0x80/0xd8
[ 8364.308853] __vfs_read+0x60/0x188
[ 8364.312242] vfs_read+0x94/0x150
[ 8364.315458] ksys_read+0x6c/0xd8
[ 8364.318675] __arm64_sys_read+0x24/0x30
[ 8364.322499] el0_svc_handler+0x84/0x140
[ 8364.326322] el0_svc+0x8/0xc
proc_reg_read函数:
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
read = pde->proc_fops->read; -------file_opearations下的read函数。就是seq_read
if (read)
rv = read(file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}
seq_read 会调用seq_operations cpuinfo_op 下的函数s_read(arch/arm64/kernel/cpuinfo.c)
===============字符设备读写中file_operations结构体的传递分析==================
demo.c
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
static int major = 250;
static int minor=0;
static dev_t devno;
static struct class *cls;
static struct device *test_device;
static int hello_open (struct inode *inode, struct file *filep)
{
printk("hello_open \n");
dump_stack();
return 0;
}
static ssize_t hello_read (struct file *filp, char __user *buf, size_t sz, loff_t *off)
{
printk("hello_read \n");
dump_stack();
return 0;
}
static struct file_operations hello_ops=
{
.open = hello_open,
.read = hello_read,
};
static int hello_init(void)
{
int ret;
printk("hello_init \n");
devno = MKDEV(major,minor);
ret = register_chrdev(major,"hello",&hello_ops);
cls = class_create(THIS_MODULE, "myclass");
if(IS_ERR(cls))
{
unregister_chrdev(major,"hello");
return -EBUSY;
}
test_device = device_create(cls,NULL,devno,NULL,"hello");//mknod /dev/hello
if(IS_ERR(test_device))
{
class_destroy(cls);
unregister_chrdev(major,"hello");
return -EBUSY;
}
return 0;
}
static void hello_exit(void)
{
device_destroy(cls,devno);
class_destroy(cls);
unregister_chrdev(major,"hello");
printk("hello_exit \n");
}
MODULE_LICENSE("GPL");
module_init(hello_init);
module_exit(hello_exit);
读:cat /dev/hello
open:
[ 9593.027404] CPU: 5 PID: 3743 Comm: cat Tainted: G OE 4.19.0full #60
[ 9593.034872] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 9593.040343] Call trace:
[ 9593.042784] dump_backtrace+0x0/0x1b8
[ 9593.046432] show_stack+0x24/0x30
[ 9593.049736] dump_stack+0x90/0xb4
[ 9593.053041] hello_open+0x20/0x30 [char]
[ 9593.056952] chrdev_open+0xc8/0x270
[ 9593.060428] do_dentry_open+0x1a8/0x340
[ 9593.064251] vfs_open+0x38/0x48
[ 9593.067379] do_last+0x28c/0x7d0
[ 9593.070596] path_openat+0x88/0x2a0
[ 9593.074071] do_filp_open+0x88/0x108
[ 9593.077634] do_sys_open+0x1a8/0x238
[ 9593.081196] __arm64_sys_openat+0x2c/0x38
[ 9593.085193] el0_svc_handler+0x84/0x140
[ 9593.089016] el0_svc+0x8/0xcread:
[ 9593.091917] hello_read
[ 9593.094357] CPU: 5 PID: 3743 Comm: cat Tainted: G OE 4.19.0full #60
[ 9593.101826] Hardware name: PHYTIUM LTD D2000/D2000, BIOS
[ 9593.107297] Call trace:
[ 9593.109731] dump_backtrace+0x0/0x1b8
[ 9593.113381] show_stack+0x24/0x30
[ 9593.116683] dump_stack+0x90/0xb4
[ 9593.119985] hello_read+0x20/0x30 [char]
[ 9593.123895] __vfs_read+0x60/0x188
[ 9593.127284] vfs_read+0x94/0x150
[ 9593.130499] ksys_read+0x6c/0xd8
[ 9593.133714] __arm64_sys_read+0x24/0x30
[ 9593.137537] el0_svc_handler+0x84/0x140
[ 9593.141359] el0_svc+0x8/0xc
默认的字符设备的file_operations:
const struct file_operations def_chr_fops = {
.open = chrdev_open,
.llseek = noop_llseek,
};
chrdev_open函数
static int chrdev_open(struct inode *inode, struct file *filp)
{
const struct file_operations *fops;
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
/* Check i_cdev again in case somebody beat us to it while
we dropped the lock. */
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
fops = fops_get(p->ops); //获取cdev结构体中的file_operations,也就是驱动中定义的。
if (!fops)
goto out_cdev_put;
replace_fops(filp, fops); //替换file_operations、之后就直接调用驱动中定义的,
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp); //调用
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}