user space 只能通过系统调用来访问kernel提供的函数,下面以sys_read 为例
sys_write函数声明在include/linux/syscalls.h文件中。这个文件中申明了linux kernel提供的所有系统调用
asmlinkage long sys_write(unsigned int fd, const char __user *buf, size_t count);
其函数实现在fs/read_write.c
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
//得到要操作的文件
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
//需要写文件的位置
loff_t pos = file_pos_read(f.file);
//调用vfs 提供的写函数
ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
}
return ret;
}
在这个系统调用中调用vfs提供的写函数vfs_write,vfs_write 通过在fs/read_write.c 这个文件中实现。
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_READ, buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
if (!ret) {
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
file_start_write(file);
ret = __vfs_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
file_end_write(file);
}
return ret;
}
vfs_write 中通过rw_verify_area 验证可以操作这个文件后,就调用__vfs_write开始写
ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
loff_t *pos)
{
if (file->f_op->write)
return file->f_op->write(file, p, count, pos);
else if (file->f_op->write_iter)
return new_sync_write(file, p, count, pos);
else
return -EINVAL;
}
__vfs_write 这个函数中就调用具体文件系统的写函数,从可以可以看到优先调用file->f_op->write。
其中file->f_op是在调用文件系统注册的时候赋值的,这里以ext4 为例
const struct file_operations ext4_file_operations = {
.llseek = ext4_llseek,
.read_iter = generic_file_read_iter,
.write_iter = ext4_file_write_iter,
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
};
这里的write是null,因此调用write_iter,也就是ext4_file_write_iter
ext4_file_write_iter->__generic_file_write_iter->generic_file_direct_write->filemap_write_and_wait_range->__filemap_fdatawrite_range->do_writepages->generic_writepages->write_cache_pages->__writepage->ext4_writepage->ext4_bio_write_page->io_submit_add_bh
可见最终的写操作还是在ext4 中实现,写操作由于不同的flag,走的flow有所不同,这里只是举例而已。
总结一下,user space 只能通过系统调用来调用kernel space 提供的接口函数。kernel space 对外通过vfs这个通用接口,各个文件系统自己实现read/write 接口,并把自己注册到文件系统的列表中.
sys_write函数声明在include/linux/syscalls.h文件中。这个文件中申明了linux kernel提供的所有系统调用
asmlinkage long sys_write(unsigned int fd, const char __user *buf, size_t count);
其函数实现在fs/read_write.c
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
//得到要操作的文件
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
//需要写文件的位置
loff_t pos = file_pos_read(f.file);
//调用vfs 提供的写函数
ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
}
return ret;
}
在这个系统调用中调用vfs提供的写函数vfs_write,vfs_write 通过在fs/read_write.c 这个文件中实现。
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_READ, buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
if (!ret) {
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
file_start_write(file);
ret = __vfs_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
file_end_write(file);
}
return ret;
}
vfs_write 中通过rw_verify_area 验证可以操作这个文件后,就调用__vfs_write开始写
ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
loff_t *pos)
{
if (file->f_op->write)
return file->f_op->write(file, p, count, pos);
else if (file->f_op->write_iter)
return new_sync_write(file, p, count, pos);
else
return -EINVAL;
}
__vfs_write 这个函数中就调用具体文件系统的写函数,从可以可以看到优先调用file->f_op->write。
其中file->f_op是在调用文件系统注册的时候赋值的,这里以ext4 为例
const struct file_operations ext4_file_operations = {
.llseek = ext4_llseek,
.read_iter = generic_file_read_iter,
.write_iter = ext4_file_write_iter,
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
};
这里的write是null,因此调用write_iter,也就是ext4_file_write_iter
ext4_file_write_iter->__generic_file_write_iter->generic_file_direct_write->filemap_write_and_wait_range->__filemap_fdatawrite_range->do_writepages->generic_writepages->write_cache_pages->__writepage->ext4_writepage->ext4_bio_write_page->io_submit_add_bh
可见最终的写操作还是在ext4 中实现,写操作由于不同的flag,走的flow有所不同,这里只是举例而已。
总结一下,user space 只能通过系统调用来调用kernel space 提供的接口函数。kernel space 对外通过vfs这个通用接口,各个文件系统自己实现read/write 接口,并把自己注册到文件系统的列表中.