Linux内核学习之顺序文件
顺序文件,即根据记录序列合成的文件。小的文件系统中的文件,用户层通常是从头到尾读取的,其内容可能是遍历一些数据项创建的。这些数据项,举例来说,可能是数组元素。内核会从头到尾遍历整个数组,对每个数据项创建一个文本表示。
fs/seq_file.c中的例程容许用最小的代价来实现此类文件。顺序访问,即逐个读取数据项,是顺序文件首选的访问模式。另外,顺序文件可以进行定位操作,但其实现并不高效。
数据结构及实现原理
struct seq_file { /*指向一个内存缓冲区,用于构建传输给用户层 的数据 */ char *buf; /*给出了缓冲区中总的字节数*/ size_t size;
/*注意: index 和from 是不同的,因为从内核向缓冲区中写入 数据,与将这些数据复制到用户空间,这两种操作是 不同的 */
/*复制操作的起始位置由from指定*/ size_t from;
/*记录已经写入到内核缓冲区的字节数*/
size_t count; /*缓冲区的另一个索引,它标记了内核向缓冲区 写入下一个新纪录的起始位置 */ loff_t index; loff_t read_pos; u64 version; struct mutex lock; const struct seq_operations *op; void *private; };
struct seq_operations { /*每当对一个顺序文件开始一个操作时,都调用start 方法*/ void * (*start) (struct seq_file *m, loff_t *pos); /*该方法用于关闭顺序文件,在几乎所有的情况下,都 不需要做任何事 */ void (*stop) (struct seq_file *m, void *v); /*在需要将游标移动到下一个位置(下一个数据项)时,需调用next 方法*/ void * (*next) (struct seq_file *m, void *v, loff_t *pos); /*顺序文件的实际内容是调用该方法生成的*/ int (*show) (struct seq_file *m, void *v); }; |
在seq_open()中,会将struct seq_file实例作为私有数据传递给file实例,然后在seq_read()操作中取出seq_file实例,调用该实例的start、stop、next、show实现对顺序文件的读操作。
int seq_open(struct file *file, const struct seq_operations *op) { struct seq_file *p = file->private_data;
if (!p) { p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; file->private_data = p; } memset(p, 0, sizeof(*p)); mutex_init(&p->lock); p->op = op;
/* * Wrappers around seq_open(e.g. swaps_open) need to be * aware of this. If they set f_version themselves, they * should call seq_open first and then set f_version. */ file->f_version = 0;
/* * seq_files support lseek() and pread(). They do not implement * write() at all, but we clear FMODE_PWRITE here for historical * reasons. * * If a client of seq_files a) implements file.write() and b) wishes to * support pwrite() then that client will need to implement its own * file.open() which calls seq_open() and then sets FMODE_PWRITE. */ file->f_mode &= ~FMODE_PWRITE; return 0; }
ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *m = (struct seq_file *)file->private_data; size_t copied = 0; loff_t pos; size_t n; void *p; int err = 0;
mutex_lock(&m->lock);
/* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { m->read_pos = *ppos; while ((err = traverse(m, *ppos)) == -EAGAIN) ; if (err) { /* With prejudice... */ m->read_pos = 0; m->version = 0; m->index = 0; m->count = 0; goto Done; } }
/* * seq_file->op->..m_start/m_stop/m_next may do special actions * or optimisations based on the file->f_version, so we want to * pass the file->f_version to those methods. * * seq_file->version is just copy of f_version, and seq_file * methods can treat it simply as file version. * It is copied in first and copied out after all operations. * It is convenient to have it as part of structure to avoid the * need of passing another argument to all the seq_file methods. */ m->version = file->f_version; /* grab buffer if we didn't have one */ if (!m->buf) { m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); if (!m->buf) goto Enomem; } /* if not empty - flush it first */ if (m->count) { n = min(m->count, size); err = copy_to_user(buf, m->buf + m->from, n); if (err) goto Efault; m->count -= n; m->from += n; size -= n; buf += n; copied += n; if (!m->count) m->index++; if (!size) goto Done; } /* we need at least one record in buffer */ pos = m->index; p = m->op->start(m, &pos); while (1) { err = PTR_ERR(p); if (!p || IS_ERR(p)) break; err = m->op->show(m, p); if (err < 0) break; if (unlikely(err)) m->count = 0; if (unlikely(!m->count)) { p = m->op->next(m, p, &pos); m->index = pos; continue; } if (m->count < m->size) goto Fill; m->op->stop(m, p); kfree(m->buf); m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); if (!m->buf) goto Enomem; m->count = 0; m->version = 0; pos = m->index; p = m->op->start(m, &pos); } m->op->stop(m, p); m->count = 0; goto Done; Fill: /* they want more? let's try to get some more */ while (m->count < size) { size_t offs = m->count; loff_t next = pos; p = m->op->next(m, p, &next); if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; } err = m->op->show(m, p); if (m->count == m->size || err) { m->count = offs; if (likely(err <= 0)) break; } pos = next; } m->op->stop(m, p); n = min(m->count, size); err = copy_to_user(buf, m->buf, n); if (err) goto Efault; copied += n; m->count -= n; if (m->count) m->from = n; else pos++; m->index = pos; Done: if (!copied) copied = err; else { *ppos += copied; m->read_pos += copied; } file->f_version = m->version; mutex_unlock(&m->lock); return copied; Enomem: err = -ENOMEM; goto Done; Efault: err = -EFAULT; goto Done; } |
编写顺序文件处理程序——以kprobe机制实现为例
1、 提供一个structfile_operations实例
static const struct file_operations debugfs_kprobes_operations = { .open = kprobes_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; |
其中,一些函数指针指向一些seq_例程(顺序文件的标准例程)。其中唯一需要实现的方法是open.
2、 open方法的实现
static const struct seq_operations kprobes_seq_ops = { .start = kprobe_seq_start, .next = kprobe_seq_next, .stop = kprobe_seq_stop, .show = show_kprobe_addr };
static int __kprobes kprobes_open(struct inode *inode, struct file *filp) { return seq_open(filp, &kprobes_seq_ops); } |
在该方法中需要将文件关联到顺序文件接口。
3、 实现顺序文件接口
static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset,char *modname) { char *kprobe_type;
if (p->pre_handler == pre_handler_kretprobe) kprobe_type = "r"; else if (p->pre_handler == setjmp_pre_handler) kprobe_type = "j"; else kprobe_type = "k"; if (sym) /* seq_printf() 为顺序文件的格式化输出,顺序文件提供了一些自己的辅助 函数 */ seq_printf(pi, "%p %s %s+0x%x %s %s%s\n", p->addr, kprobe_type, sym, offset, (modname ? modname : " "), (kprobe_gone(p) ? "[GONE]" : ""), ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : "")); else seq_printf(pi, "%p %s %p %s%s\n", p->addr, kprobe_type, p->addr, (kprobe_gone(p) ? "[GONE]" : ""), ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : "")); }
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) { return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; }
static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) { (*pos)++; /*按数据项类型递增*/ if (*pos >= KPROBE_TABLE_SIZE) return NULL; return pos; }
/*几乎在所有情况下,都不需要做任何事情*/ static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) { /* Nothing to do */ }
/* *@v: 指定文件游标的当前值 */ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) { struct hlist_head *head; struct hlist_node *node; struct kprobe *p, *kp; const char *sym = NULL; /*将文件游标的当前值转换为数组索引i*/ unsigned int i = *(loff_t *) v; unsigned long offset = 0; char *modname, namebuf[128];
head = &kprobe_table[i]; preempt_disable(); /*内存屏障禁止*/ hlist_for_each_entry_rcu(p, node, head, hlist) { sym = kallsyms_lookup((unsigned long)p->addr, NULL, &offset, &modname, namebuf); if (p->pre_handler == aggr_pre_handler) { list_for_each_entry_rcu(kp, &p->list, list) report_probe(pi, kp, sym, offset, modname); } else report_probe(pi, p, sym, offset, modname); } preempt_enable();/*内存屏障使能*/
return 0; } |
4、 创建文件,并将文件名和file_operations实例关联
static int __kprobes debugfs_kprobe_init(void) { struct dentry *dir, *file; unsigned int value = 1;
/*创建目录kprobes */ dir = debugfs_create_dir("kprobes", NULL); if (!dir) return -ENOMEM;
/*在kprobes 目录下创建文件List,并与debugfs_kprobes_operations实例关联*/ file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_kprobes_operations); if (!file) { debugfs_remove(dir); return -ENOMEM; }
file = debugfs_create_file("enabled", 0600, dir, &value, &fops_kp); if (!file) { debugfs_remove(dir); return -ENOMEM; }
return 0; } |