linux 2.6.28 kernel之pipefs管道文件系统详解

最新推荐文章于 2024-08-30 21:46:10 发布

xx-xx-xxx-xxx

最新推荐文章于 2024-08-30 21:46:10 发布

阅读量700

点赞数 21

分类专栏：文件系统文章标签： linux 服务器网络

本文链接：https://blog.csdn.net/wenfengliaoshuzhai/article/details/141534339

版权

文件系统专栏收录该内容

8 篇文章 0 订阅

订阅专栏

参考<<深入分析Linux内核源码>>
fs/pipe.c
管道文件系统pipefs
pipefs是一种简单的、虚拟的文件系统类型，因为它没有对应的物理设备，因此其安装时不需要块设备

#define fs_initcall(fn)__define_initcall(“5”,fn,5)
#define __define_initcall(level,fn,id)
static initcall_t _initcall##fn##id __used
attribute((section(“.initcall” level “.init”))) = fn
<linux 2.6.28 kernel之__initcal机制和module_init详解>
fs/pipe.c
static struct file_system_type pipe_fs_type = {
.name = “pipefs”,
.get_sb = pipefs_get_sb,
.kill_sb = kill_anon_super,
};

include/linux/init.h
/* Don’t use these in modules, but some people do… */
#define core_initcall(fn)module_init(fn)
#define postcore_initcall(fn)module_init(fn)
#define arch_initcall(fn)module_init(fn)
#define subsys_initcall(fn)module_init(fn)
#define fs_initcall(fn)module_init(fn)
#define device_initcall(fn)module_init(fn)
#define late_initcall(fn)module_init(fn)
//
fs_initcall(init_pipe_fs);
module_exit(exit_pipe_fs);

static int __init init_pipe_fs(void)
{
int err = register_filesystem(&pipe_fs_type);

if (!err) {
//在通过register_filesystem()成功地注册了该文件系统后，应该通过kern_mount()来安装
//#define kern_mount(type) kern_mount_data(type, NULL) // fs.h
pipe_mnt = kern_mount(&pipe_fs_type);
if (IS_ERR(pipe_mnt)) {
err = PTR_ERR(pipe_mnt);
unregister_filesystem(&pipe_fs_type);
//当安装出现错误时，则调用unregister_filesystem（）把pipe_fs_type从file_systems链表中拆除。
}
//pipefs文件系统已被注册，并成为内核中的一个模块，从此我们就可以使用它了。
//Pipefs文件系统的入口点就是pipe()系统调用，其内核实现函数为sys_pipe()，
//而真正的工作是调用do_pipe()函数来完成的，其代码在/fs/pipe.c中
}
return err;
}
//
static void __exit exit_pipe_fs(void)
{
unregister_filesystem(&pipe_fs_type);
mntput(pipe_mnt);
}
/
static struct file_system_type pipe_fs_type = {
.name= “pipefs”,
.get_sb= pipefs_get_sb,
.kill_sb= kill_anon_super,
};
/*

pipefs should never be mounted by userland - too much of security hassle,
no real gain from having the whole whorehouse mounted. So we don’t need
any operations on the root directory. However, we need a non-trivial
d_name - pipe: will go nicely and kill the special-casing in procfs.
*/
该文件系统不能从用户空间进行安装，并且在整个系统范围内只有一个超级块
static int pipefs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
struct vfsmount mnt)
{
return get_sb_pseudo(fs_type, “pipe:”, NULL, PIPEFS_MAGIC, mnt);
}
/
Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
will never be mountable)
*/
int get_sb_pseudo(struct file_system_type *fs_type, char *name,
const struct super_operations *ops, unsigned long magic,
struct vfsmount *mnt)
{
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
struct inode *root;
struct qstr d_name = {.name = name, .len = strlen(name)};

if (IS_ERR(s))
return PTR_ERR(s);

s->s_flags = MS_NOUSER;
s->s_maxbytes = ~0ULL;
s->s_blocksize = PAGE_SIZE;
s->s_blocksize_bits = PAGE_SHIFT;
s->s_magic = magic;
s->s_op = ops ? ops : &simple_super_operations;
s->s_time_gran = 1;
root = new_inode(s);
if (!root)
goto Enomem;
/*

since this is the first inode, make it number 1. New inodes created
after this must take care not to collide with it (by passing
max_reserved of 1 to iunique).
*/
root->i_ino = 1;
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
root->i_uid = root->i_gid = 0;
root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
dentry = d_alloc(NULL, &d_name);
if (!dentry) {
iput(root);
goto Enomem;
}
dentry->d_sb = s;
dentry->d_parent = dentry;
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_flags |= MS_ACTIVE;
return simple_set_mnt(mnt, s);

Enomem:
up_write(&s->s_umount);
deactivate_super(s);
return -ENOMEM;
}
fs/super.c
void kill_anon_super(struct super_block *sb)
{
int slot = MINOR(sb->s_dev);

generic_shutdown_super(sb);
spin_lock(&unnamed_dev_lock);
ida_remove(&unnamed_dev_ida, slot);
spin_unlock(&unnamed_dev_lock);
}
/**
*generic_shutdown_super-common helper for ->kill_sb()
*@sb: superblock to kill
*
*generic_shutdown_super() does all fs-independent work on superblock
*shutdown. Typical ->kill_sb() should pick all fs-specific objects
*that need destruction out of superblock, call generic_shutdown_super()
*and release aforementioned objects. Note: dentries and inodes are
*taken care of and do not need specific handling.
*
*Upon calling this function, the filesystem may no longer alter or
*rearrange the set of dentries belonging to this super_block, nor may it
*change the attachments of dentries to inodes.
*/
fs/super.c
void generic_shutdown_super(struct super_block *sb)
{
const struct super_operations *sop = sb->s_op;

if (sb->s_root) {
shrink_dcache_for_umount(sb);
fsync_super(sb);
lock_super(sb);
sb->s_flags &= ~MS_ACTIVE;
/* bad name - it should be evict_inodes() */
invalidate_inodes(sb);
lock_kernel();

if (sop->write_super && sb->s_dirt)
sop->write_super(sb);
if (sop->put_super)
sop->put_super(sb);

/* Forget any remaining inodes */
if (invalidate_inodes(sb)) {
printk("VFS: Busy inodes after unmount of %s. "
“Self-destruct in 5 seconds. Have a nice day…\n”,
sb->s_id);
}

unlock_kernel();
unlock_super(sb);
}
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
list_del_init(&sb->s_list);
list_del(&sb->s_instances);
spin_unlock(&sb_lock);
up_write(&sb->s_umount);
}

//#define kern_mount(type) kern_mount_data(type, NULL) // fs.h
//kern_mount()类似于do_mount()，用来安装pipefs文件系统
surper.c
struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
{
return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
}

fs/filesystems.c
/**
*register_filesystem - register a new filesystem
*@fs: the file system structure
*
*Adds the file system passed to the list of file systems the kernel
*is aware of for mount and other syscalls. Returns 0 on success,
*or a negative errno code on an error.
*
*The &struct file_system_type that is passed is linked into the kernel
*structures and must not be freed until the file system has been
*unregistered.
*/

int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;

BUG_ON(strchr(fs->name, ‘.’));
if (fs->next)
return -EBUSY;
INIT_LIST_HEAD(&fs->fs_supers);
write_lock(&file_systems_lock);
p = find_filesystem(fs->name, strlen(fs->name));
if (*p)
res = -EBUSY;
else
*p = fs;
write_unlock(&file_systems_lock);
return res;
}
//register_filesystem()函数把pipe_fs_type链接到file_systems链表，因此，你可以通过读/proc/filesystems(可能/proc被/sysfs替代了)找到“pipefs”入口点，在那里，“nodev”标志表示没有设置FS_REQUIRES_DEV标志，即该文件系统没有对应的物理设备
///系统调用///
fs/pipe.c
pipefs文件系统已被注册，并成为内核中的一个模块，从此我们就可以使用它了。Pipefs文件系统的入口点就是pipe()系统调用其内核实现函数为sys_pipe()，而真正的工作是调用do_pipe()函数来完成的，其代码在/fs/pipe.c中
SYSCALL_DEFINE1(pipe, int __user , fildes)
{
return sys_pipe2(fildes, 0);
}
/

sys_pipe() is the normal C calling standard for creating
a pipe. It’s not the way Unix traditionally does this, though.
*/
SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
{
int fd[2];
int error;

error = do_pipe_flags(fd, flags);
if (!error) {
if (copy_to_user(fildes, fd, sizeof(fd))) {
sys_close(fd[0]);
sys_close(fd[1]);
error = -EFAULT;
}
}
return error;
}
int do_pipe(int *fd)
{
return do_pipe_flags(fd, 0);
}
int do_pipe_flags(int *fd, int flags)
{
/进程对每个已打开文件的操作是通过file结构进行的。一个管道实际上就是一个存在于内存的文件，对这个文件的操作要通过两个已打开的文件进行，f1、f2分别代表该管道的两端，读端和写端。/
struct file *fw, *fr;
int error;
int fdw, fdr;

if (flags & ~(O_CLOEXEC | O_NONBLOCK))
return -EINVAL;

fw = create_write_pipe(flags);
if (IS_ERR(fw))
return PTR_ERR(fw);
fr = create_read_pipe(fw, flags);
error = PTR_ERR(fr);
if (IS_ERR(fr))
goto err_write_pipe;

error = get_unused_fd_flags(flags);
if (error < 0)
goto err_read_pipe;
fdr = error;

error = get_unused_fd_flags(flags);
if (error < 0)
goto err_fdr;
fdw = error;

error = audit_fd_pair(fdr, fdw);
if (error < 0)
goto err_fdw;

fd_install(fdr, fr);
fd_install(fdw, fw);
fd[0] = fdr;
fd[1] = fdw;

return 0;

err_fdw:
put_unused_fd(fdw);
err_fdr:
put_unused_fd(fdr);
err_read_pipe:
path_put(&fr->f_path);
put_filp(fr);
err_write_pipe:
free_write_pipe(fw);
return error;
}
struct file *create_write_pipe(int flags)
{
int err;
struct inode *inode;
struct file *f;
struct dentry *dentry;
struct qstr name = { .name = “” };

err = -ENFILE;
inode = get_pipe_inode();/每个文件都有一个inode结构。由于管道文件在管道创建之前并不存在，因此，在创建管道时临时创建一个inode结构。/
if (!inode)
goto err;

err = -ENOMEM;
/* File结构中有个指针f_dentry指向所打开文件的目录项dentry结构，
而dentry 中有个指针指向相应的inode结构。
所以，调用d_alloc（）分配一个目录项是为了把file结构与inode结构联系起来。*/
dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
if (!dentry)
goto err_inode;

dentry->d_op = &pipefs_dentry_operations;
/*

We dont want to publish this dentry into global dentry hash table.
We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
This permits a working /proc/$pid/fd/XXX on pipes
*/
dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, inode);

err = -ENFILE;
f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
if (!f)
goto err_dentry;
f->f_mapping = inode->i_mapping;

f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
f->f_version = 0;

return f;

err_dentry:
free_pipe_info(inode);
dput(dentry);
return ERR_PTR(err);

err_inode:
free_pipe_info(inode);
iput(inode);
err:
return ERR_PTR(err);
}

//
/*

The file_operations structs are not static because they
are also used in linux/fs/fifo.c to do operations on FIFOs.
Pipes reuse fifos’ file_operations structs.
*/
fs/pipe.c
const struct file_operations read_pipefifo_fops = {
.llseek= no_llseek,
.read= do_sync_read,
.aio_read= pipe_read,
.write= bad_pipe_w,
.poll= pipe_poll,
.unlocked_ioctl= pipe_ioctl,
.open= pipe_read_open,
.release= pipe_read_release,
.fasync= pipe_read_fasync,
};

const struct file_operations write_pipefifo_fops = {
.llseek= no_llseek,
.read= bad_pipe_r,
.write= do_sync_write,
.aio_write= pipe_write,
.poll= pipe_poll,
.unlocked_ioctl= pipe_ioctl,
.open= pipe_write_open,
.release= pipe_write_release,
.fasync= pipe_write_fasync,
};

const struct file_operations rdwr_pipefifo_fops = {
.llseek= no_llseek,
.read= do_sync_read,
.aio_read= pipe_read,
.write= do_sync_write,
.aio_write= pipe_write,
.poll= pipe_poll,
.unlocked_ioctl= pipe_ioctl,
.open= pipe_rdwr_open,
.release= pipe_rdwr_release,
.fasync= pipe_rdwr_fasync,
};
尽管代表着管道两端的两个已打开文件一个只能读，一个只能写。但是，另一方面，这两个逻辑上已打开的文件指向同一个inode，即用作管道的缓冲区，显然，这个缓冲区既支持读也支持写。这进一步说明了file 、inode及dentry之间的不同和联系。