Linux read系统调用之 do_generic_file_read()

最新推荐文章于 2023-05-05 10:11:33 发布

嘉明--

最新推荐文章于 2023-05-05 10:11:33 发布

阅读量1.6k

点赞数 2

分类专栏： linux 文章标签： do_generic_file_read()

本文链接：https://blog.csdn.net/weixin_42205011/article/details/97669486

版权

linux 专栏收录该内容

12 篇文章 3 订阅

订阅专栏

1 do_generic_file_read()

上篇文章讲到 do_generic_file_read() 函数，现在我们来具体分析以下这个函数。

static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
		struct iov_iter *iter, ssize_t written)
{
	struct address_space *mapping = filp->f_mapping;
	struct inode *inode = mapping->host;
	struct file_ra_state *ra = &filp->f_ra;
	pgoff_t index;
	pgoff_t last_index;
	pgoff_t prev_index;
	unsigned long offset;      /* offset into pagecache page */
	unsigned int prev_offset;
	int error = 0;

	index = *ppos >> PAGE_CACHE_SHIFT;
	prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
	prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
	last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
	offset = *ppos & ~PAGE_CACHE_MASK;

	for (;;) {
		struct page *page;
		pgoff_t end_index;
		loff_t isize;
		unsigned long nr, ret;

		cond_resched();
find_page:
		page = find_get_page(mapping, index);
		if (!page) {
			page_cache_sync_readahead(mapping,			//mm/readahead.c/line489
					ra, filp,
					index, last_index - index);
			page = find_get_page(mapping, index);
			if (unlikely(page == NULL))
				goto no_cached_page;
		}
		if (PageReadahead(page)) {
			page_cache_async_readahead(mapping,
					ra, filp, page,
					index, last_index - index);
		}
		if (!PageUptodate(page)) {
			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
					!mapping->a_ops->is_partially_uptodate)
				goto page_not_up_to_date;
			if (!trylock_page(page))
				goto page_not_up_to_date;
			/* Did it get truncated before we got the lock? */
			if (!page->mapping)
				goto page_not_up_to_date_locked;
			if (!mapping->a_ops->is_partially_uptodate(page,
							offset, iter->count))
				goto page_not_up_to_date_locked;
			unlock_page(page);
		}
page_ok:
		/*
		 * i_size must be checked after we know the page is Uptodate.
		 *
		 * Checking i_size after the check allows us to calculate
		 * the correct value for "nr", which means the zero-filled
		 * part of the page is not copied back to userspace (unless
		 * another truncate extends the file - this is desired though).
		 */

		isize = i_size_read(inode);
		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
		if (unlikely(!isize || index > end_index)) {
			page_cache_release(page);
			goto out;
		}

		/* nr is the maximum number of bytes to copy from this page */
		nr = PAGE_CACHE_SIZE;
		if (index == end_index) {
			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
			if (nr <= offset) {
				page_cache_release(page);
				goto out;
			}
		}
		nr = nr - offset;

		/* If users can be writing to this page using arbitrary
		 * virtual addresses, take care about potential aliasing
		 * before reading the page on the kernel side.
		 */
		if (mapping_writably_mapped(mapping))
			flush_dcache_page(page);

		/*
		 * When a sequential read accesses a page several times,
		 * only mark it as accessed the first time.
		 */
		if (prev_index != index || offset != prev_offset)
			mark_page_accessed(page);
		prev_index = index;

		/*
		 * Ok, we have the page, and it's up-to-date, so
		 * now we can copy it to user space...
		 */

		ret = copy_page_to_iter(page, offset, nr, iter);
		offset += ret;
		index += offset >> PAGE_CACHE_SHIFT;
		offset &= ~PAGE_CACHE_MASK;
		prev_offset = offset;

		page_cache_release(page);
		written += ret;
		if (!iov_iter_count(iter))
			goto out;
		if (ret < nr) {
			error = -EFAULT;
			goto out;
		}
		continue;

page_not_up_to_date:
		/* Get exclusive access to the page ... */
		error = lock_page_killable(page);
		if (unlikely(error))
			goto readpage_error;

page_not_up_to_date_locked:
		/* Did it get truncated before we got the lock? */
		if (!page->mapping) {
			unlock_page(page);
			page_cache_release(page);
			continue;
		}

		/* Did somebody else fill it already? */
		if (PageUptodate(page)) {
			unlock_page(page);
			goto page_ok;
		}

readpage:
		/*
		 * A previous I/O error may have been due to temporary
		 * failures, eg. multipath errors.
		 * PG_error will be set again if readpage fails.
		 */
		ClearPageError(page);
		/* Start the actual read. The read will unlock the page. */
		error = mapping->a_ops->readpage(filp, page);

		if (unlikely(error)) {
			if (error == AOP_TRUNCATED_PAGE) {
				page_cache_release(page);
				error = 0;
				goto find_page;
			}
			goto readpage_error;
		}

		if (!PageUptodate(page)) {
			error = lock_page_killable(page);
			if (unlikely(error))
				goto readpage_error;
			if (!PageUptodate(page)) {
				if (page->mapping == NULL) {
					/*
					 * invalidate_mapping_pages got it
					 */
					unlock_page(page);
					page_cache_release(page);
					goto find_page;
				}
				unlock_page(page);
				shrink_readahead_size_eio(filp, ra);
				error = -EIO;
				goto readpage_error;
			}
			unlock_page(page);
		}

		goto page_ok;

readpage_error:
		/* UHHUH! A synchronous read error occurred. Report it */
		page_cache_release(page);
		goto out;

no_cached_page:
		/*
		 * Ok, it wasn't cached, so we need to create a new
		 * page..
		 */
		page = page_cache_alloc_cold(mapping);
		if (!page) {
			error = -ENOMEM;
			goto out;
		}
		error = add_to_page_cache_lru(page, mapping,
						index, GFP_KERNEL);
		if (error) {
			page_cache_release(page);
			if (error == -EEXIST) {
				error = 0;
				goto find_page;
			}
			goto out;
		}
		goto readpage;
	}

out:
	ra->prev_pos = prev_index;
	ra->prev_pos <<= PAGE_CACHE_SHIFT;
	ra->prev_pos |= prev_offset;

	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
	file_accessed(filp);
	return written ? written : error;
}

1、来看函数开头的几个变量：

index 表示要读的位置处于 address_space 中的页偏移。
pre_index 表示上一次的预读之后的页偏移。
pre_offset 表示上一次的预读之后的页内偏移。
last_index 表示要读的最后一个字节位于 address_space 中的页偏移。
offset 表示要读的位置处于业内的业内偏移。

2、整体循环

for 循环与 cond_resched() 函数应该与进程调度有关系，目前还没看懂。

3、label：find_page

find_get_page 表示在文件 inode 节点的 address_space 中寻找所需要的页偏移的页，如果找不到，启动同步预读函数 page_cache_sync_readahead()。
同步预读函数结束后，需要的缓冲页应该就被加入到 inode 节点的 address_space 中，除非缓冲页没有了，分配不到。如果分配不到的话，就跳转到 label no_cached_page。
找到 page 后，判断此页是否是标记好的预读页，如果是，说明进程是在按照我们的预计顺序读取，启动异步预读。
如果页内容是最新的，直接执行到 page_ok。
如果不是最新，判断文件系统的 block 是否是4k 或 is_partially_uptodate 接口是否实现，根据条件判断是否跳转到 page_not_up_to_date。
锁定页面，锁定不成功跳转到 page_not_up_to_date。
锁定成功，跳转到 page_not_up_to_date_locked。

这里有几点需要说明：

IO(包括读)操作本身就是异步的，这里的sync实际并非真正意义上的同步，page_cache_async_readahead和page_cache_sync_readahead实际都是异步读，异步体现在read_pages之后，最终实际是将IO request提交到相应的请求队列后，就直接返回了，并没有阻塞等待IO完成，最终IO request的提交还依赖于kblockd之类的机制对请求队列进行unplug，然后将请求提交到底层，底层完成IO请求后通过中断和endio来通知上层，IO request的执行实际是异步完成的。
page_cache_sync_readahead的“同步”，体现在do_generic_file_read中调用page_cache_sync_readahead后，会通过lock_page_killable(page)同步等待“需要读取的”page的读操作完成，而此时该page的读请求是和其它预读的pages通过page_cache_sync_readahead一起下发的。
在调用具体的 IO 操作拷贝数据到 page cache 之前，需要 Lock page，IO 操作完成后，会 unlock page。

4、label：page_ok

page_ok 可以从两个 label 处跳转过来，一个是 get_page，当 page 中的内容是最新的时候，直接顺序执行到 page_ok。一个是 page_not_up_to_date_locked，这个下面再说。
isize 等于文件的大小，end_index 等于文件最后一个自己字节的页索引。如果 isize == 0 或读的起始位置 > 文件最后一个字节所在的页索引，release page cache ，go out。
调用 copy_page_to_iter 拷贝页内容到用户空间。
调用 iov_iter_count 判断是否拷贝完，若已完成，goto out。
若未完成，continue 跳出本次 for 循环，继续拷贝。

5、label：page_not_up_to_date

在 find_page 中，判断 page cache 是否是最新，若不是，锁定页，若未锁定成功跳转至 page_not_up_to_date。
调用 lock_page_killable 再次锁定页。
若成功，顺序执行到 page_not_up_to_date_locked。

6、label：page_not_up_to_date_locked

判断 page cache 是否跟新完成，是，跳转到 page_ok。

7、label：readpage

从 no_cache_page 跳转至此。
调用 mapping->a_ops_readpage 函数读页面。
如果页面是最新的，跳转至 page_ok。
如果页面不知最新的，锁定页面，跳转至 find_page。

8、label：readpage_error

readpage error 与第7个 label 相关。

9、label：no_cache_page

page_cache_sync_readahead 函数中分配不到页缓存，跳转至 no_cache_page。
调用 page_ache_alloc_cold 分配内存，若分配成功，跳转至 readpage。

嘉明--

关注

2
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
Linux read系统调用之 do_generic_file_read()

1 do_generic_file_read()上篇文章讲到 do_generic_file_read() 函数，现在我们来具体分析以下这个函数。static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, struct iov_iter *iter, ssize_t written){ struct addr...
复制链接

扫一扫

专栏目录