文件数据读取
【read】
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
struct file *file;
ssize_t ret = -EBADF;
int fput_needed;
file = fget_light(fd, &fput_needed);
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_read(file, buf, count, &pos);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
return ret;
}
函数fget_light根据文件描述符fd在当前进程的文件打开列表中找到file对象。函数file_pos_read获取文件读取的当前位置。函数vfs_read完成文件数据读取的实际工作。函数file_pos_write重新设置文件的当前偏移。
【read--->vfs_read】
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
}
return ret;
}
首先检测用户空间提供的存储区是否可写,然后检测文件操作区是否可读。最后调用文件所在具体文件系统的数据读取函数读取需要的数据。
【read--->vfs_read--->do_sync_read】
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
for (;;) {
ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
wait_on_retry_sync_kiocb(&kiocb);
}
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return ret;
}
初始化一个用户空间缓存管理结构iovec和数据读取参数管理结构kiocb。
【read--->vfs_read--->do_sync_read--->aio_read】
比如ext3文件系统,它的函数指针aio_read指向函数generic_file_aio_read。
const struct file_operations ext3_file_operations = {
......
.aio_read = generic_file_aio_read,
......
};
ssize_t
generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg = 0;
size_t count;
loff_t *ppos = &iocb->ki_pos;
count = 0;
retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
if (retval)
return retval;
if (filp->f_flags & O_DIRECT) {
loff_t size;
struct address_space *mapping;
struct inode *inode;
mapping = filp->f_mapping;
inode = mapping->host;
if (!count)
goto out; /* skip atime */
size = i_size_read(inode);
if (pos < size) {
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
struct blk_plug plug;
blk_start_plug(&plug);
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
blk_finish_plug(&plug);
}
if (retval > 0) {
*ppos = pos + retval;
count -= retval;
}
if (retval < 0 || !count || *ppos >= size) {
file_accessed(filp);
goto out;
}
}
}
count = retval;
for (seg = 0; seg < nr_segs; seg++) {
read_descriptor_t desc;
loff_t offset = 0;
if (count) {
if (count > iov[seg].iov_len) {
count -= iov[seg].iov_len;
continue;
}
offset = count;
count = 0;
}
desc.written = 0;
desc.arg.buf = iov[seg].iov_base + offset;
desc.count = iov[seg].iov_len - offset;
if (desc.count == 0)
continue;
desc.error = 0;
do_generic_file_read(filp, ppos, &desc, file_read_actor);
retval += desc.written;
if (desc.error) {
retval = retval ?: desc.error;
break;
}
if (desc.count > 0)
break;
}
out:
return retval;
}
首先通过函数generic_segment_checks检测每一个用户空间缓存数据段是否可写。如果filp->f_flags 设了标志O_DIRECT ,先调用filemap_write_and_wait_range将相应位置可能存在的page cache废弃掉或刷回磁盘(避免产生不一致),然后绕过页高速缓存直接通过函数mapping->a_ops->direct_IO读取数据。如果没有设置标志O_DIRECT或者直接读取数据没有读完,然后对每一个数据段调用通用数据读取函数do_generic_file_read。
转载于:https://blog.51cto.com/csdyabc/866206