第10章 文件系统读写--2

10.2 文件预读

10.3 文件锁

10.4 文件读

asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
____struct file *file;
____ssize_t ret = -EBADF;
____int fput_needed;

/* 通过current指针获得当前进程打开的所有文件的文件描述符表,根据fd获取当前的文件;同时增加应用计数 */  
____file = fget_light(fd, &fput_needed);                                   
____if (file) {
/* 获取文件当前的position: file->f_pos */
________loff_t pos = file_pos_read(file);    

________ret = vfs_read(file, buf, count, &pos);

/* 读取完文件后写回操作文件的position: file->f_pos; */
________file_pos_write(file, pos);

________fput_light(file, fput_needed); /* 当前文件的引用计数减一 */
____}

____return ret;
}

ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
____ssize_t ret;

____if (!(file->f_mode & FMODE_READ))
________return -EBADF;
____if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))            
________return -EINVAL;
____if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
________return -EFAULT;

____ret = rw_verify_area(READ, file, pos, count); /*检查文件是否有读写锁,及权限*/
____if (ret >= 0) {
________count = ret;
________ret = security_file_permission (file, MAY_READ); /*os安全相关*/
________if (!ret) {
____________if (file->f_op->read)
________________ret = file->f_op->read(file, buf, count, pos); /*文件的读*/
____________else
________________ret = do_sync_read(file, buf, count, pos);
____________if (ret > 0) {
________________fsnotify_access(file->f_dentry);
________________current->rchar += ret;
____________}
____________current->syscr++;
________}
____}

____return ret;
}

文件的读:file->f_op_read   =>  以ext2文件系统的读为例:generic_file_read。

generic_file_read(),通过struct kiocb实现文件的同步和异步操作的问题。

ssize_t                                                                           
generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{                                                                                 
____struct iovec local_iov = { .iov_base = buf, .iov_len = count };               
____struct kiocb kiocb;                                                           
____ssize_t ret;                                                                  
                                                                                  
____init_sync_kiocb(&kiocb, filp);                                                
____ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);                   
____if (-EIOCBQUEUED == ret)                                                      
________ret = wait_on_sync_kiocb(&kiocb);                                         
____return ret;                                                                   
}                                                                                 
EXPORT_SYMBOL(generic_file_read);                                                 

__generic_file_aio_read()分为三部分:其中最重要的部分是buffer I/O的处理过程。direct I/O只是业务逻辑,和buffer I/O基本流程大部分相同。

ssize_t                                                             
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
________unsigned long nr_segs, loff_t *ppos)                        
{                                                                   
____struct file *filp = iocb->ki_filp;                              
____ssize_t retval;                                                 
____unsigned long seg;                                              
____size_t count;                                                   
/*第一部分:计算希望读的字节数,校验用户态是否合法。*/                                                                    
____count = 0;                                                      
____for (seg = 0; seg < nr_segs; seg++) {                           
________const struct iovec *iv = &iov[seg];                         
                                                                    
________/*                                                          
________ * If any segment has a negative length, or the cumulative  
________ * length ever wraps negative then return -EINVAL.          
________ */                                                         
________count += iv->iov_len;                                       
________if (unlikely((ssize_t)(count|iv->iov_len) < 0))             
____________return -EINVAL;                                         
________if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))/*校验用户态是否合法*/     
____________continue;                                               
________if (seg == 0)                                               
____________return -EFAULT;                                         
________nr_segs = seg;                                              
________count -= iv->iov_len;___/* This segment is no good */       
________break;                                                      
____}                                                               
/*第二部分:处理direct I/O */                                                                    
____/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */     
____if (filp->f_flags & O_DIRECT) {                                 
________loff_t pos = *ppos, size;                                   
________struct address_space *mapping;                              
________struct inode *inode;                                        
                                                                    
________mapping = filp->f_mapping;                                  
________inode = mapping->host;                                      
________retval = 0;                                                 
________if (!count)                                                 
____________goto out; /* skip atime */                              
________size = i_size_read(inode);                                  
________if (pos < size) {                                           
____________retval = generic_file_direct_IO(READ, iocb,             
________________________iov, pos, nr_segs);                         
____________if (retval > 0 && !is_sync_kiocb(iocb))                 
________________retval = -EIOCBQUEUED;                              
____________if (retval > 0)                                         
________________*ppos = pos + retval;                               
________}                                                           
________file_accessed(filp);                                        
________goto out;                                                   
____}
/*第三部分:buffer I/O 处理*/                                                               
____retval = 0;                                                   
____if (count) {                                                  
________for (seg = 0; seg < nr_segs; seg++) {                     
____________read_descriptor_t desc;                               
                                                                  
____________desc.written = 0;                                     
____________desc.arg.buf = iov[seg].iov_base;                     
____________desc.count = iov[seg].iov_len;                        
____________if (desc.count == 0)                                  
________________continue;                                         
____________desc.error = 0;                                       
____________do_generic_file_read(filp,ppos,&desc,file_read_actor);
____________retval += desc.written;                               
____________if (desc.error) {                                     
________________retval = retval ?: desc.error;                    
________________break;                                            
____________}                                                     
________}                                                         
____}                                                             
out:                                                              
____return retval;                                                
}                                                                 
static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
____________________read_descriptor_t * desc,                            
____________________read_actor_t actor)                                  
{                                                                        
____do_generic_mapping_read(filp->f_mapping,                             
________________&filp->f_ra,                                             
________________filp,                                                    
________________ppos,                                                    
________________desc,                                                    
________________actor);                                                  
}                                                                        

 

do_generic_mapping_read()函数,通过函数的名字可以看出,是处理通用的mapping的读操作,而mapping在file中由代表了文件的page cahe部分,所以,(对于使用page cache的buffer I/O)整个函数主要对文件的操作的就是对page cache操作。如果文件的内容已经在page cache里面,不需要读,直接复制内存就可以;如果page cache没有文件内容,则需要申请page cache,然后从磁盘读文件内容到page cache。

以下共分七个部分来说明:

1)为了便于查找文件读的位置,首先把文件读位置及字节数转换为在page cache中的页面索引值及页内的偏离量,以及预读一部分页面。

void do_generic_mapping_read(struct address_space *mapping,                    
____________     struct file_ra_state *_ra,                                    
____________     struct file *filp,                                            
____________     loff_t *ppos,                                                 
____________     read_descriptor_t *desc,                                      
____________     read_actor_t actor)                                           
{                                                                              
____struct inode *inode = mapping->host;                                       
____unsigned long index;                                                       
____unsigned long end_index;                                                   
____unsigned long offset;                                                      
____unsigned long last_index;                                                  
____unsigned long next_index;                                                  
____unsigned long prev_index;                                                  
____loff_t isize;                                                              
____struct page *cached_page;                                                  
____int error;                                                                 
____struct file_ra_state ra = *_ra;                                            
                                                                               
____cached_page = NULL;                                                        
____index = *ppos >> PAGE_CACHE_SHIFT; /*文件开始读的位置对应page cache中的索引*/                                         
____next_index = index;                                                        
____prev_index = ra.prev_page; /*文件预读的前一索引值*/  
                          
/*本次读结束的位置,在page cache中的索引值*/                     
____last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;

/*在第一个开始要读的页中的偏移位置*/
____offset = *ppos & ~PAGE_CACHE_MASK;                                         

/*整个文件的大小*/                                                                               
____isize = i_size_read(inode);                                                
____if (!isize)                                                                
________goto out;                                                              

/*文件最后的页的索引值*/                                                                               
____end_index = (isize - 1) >> PAGE_CACHE_SHIFT;

____for (;;) {                                                        
________struct page *page;                                            
________unsigned long nr, ret;                                        
                                                                      
________/* nr is the maximum number of bytes to copy from this page */
________nr = PAGE_CACHE_SIZE;                                         
________if (index >= end_index) {                                     
____________if (index > end_index)                                    
________________goto out;                                             
____________nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;                
____________if (nr <= offset) {                                       
________________goto out;                                             
____________}                                                         
________}                                                             
________nr = nr - offset;                                             
                                                                      
________cond_resched();                                               
________if (index == next_index)                                      
____________next_index = page_cache_readahead(mapping, &ra, filp,     
____________________index, last_index - index);                       
......                              

2)检查page cache是否存在我们需要的页面.(find_page:分为三种情况1.no_cached_page,2page_ok, 3.page_not_up_to_date)

3)处理页面是最新的情况(page_ok:)

4)页面在page cache中,但不是最新的页面。(page_not_up_to_date)

5)处理读页面(no_cached_page,readpage:)

6)/*1*/申请一个页面,然后插入page cache中。

7)/*7.更新文件位置*/

find_page:
________page = find_get_page(mapping, index); /*查找文件*/
________if (unlikely(page == NULL)) {
____________handle_ra_miss(mapping, &ra, index);
/*1、页面没在page cache中,则进入这个分之,申请一个页面,获得最新的内容*/
____________goto no_cached_page;
________}


/*2、如果page cache中存在并且是最新的数据,则直接读文件;*/


/*3、page cache中存在文件但不是最新内容,则更新page cache中的文件,然后继续读文件*/
________if (!PageUptodate(page)) 

____________goto page_not_up_to_date;


/*2.*/
page_ok:
...
ret = actor(desc, page, offset, nr); /*讲从page cache中读到的配置保存到用户空间*/
...
/*actor->file_read_actor*/

/*3.*/
page_not_up_to_date:
...
________/* Get exclusive access to the page ... */                  
________lock_page(page);                                            
                                                                    
________/* Did it get unhashed before we got the lock? */           
________if (!page->mapping) {                                       
____________unlock_page(page);                                      
____________page_cache_release(page);                               
____________continue;                                               
________}                                                           
                                                                    
________/* Did somebody else fill it already? */                    
________if (PageUptodate(page)) {                                   
____________unlock_page(page);                                      
____________goto page_ok;                                           
________}                                                           
                                                                    
readpage:                                                           
________/* Start the actual read. The read will unlock the page. */ 
________error = mapping->a_ops->readpage(filp, page); 
________if (!PageUptodate(page)) {
____________lock_page(page);      
/*锁页,等待读中断返回*/        
...
____________unlock_page(page);
________}      
......               

/**/

/*1.申请一个页面,然后插入page cache中*/
no_cached_page:                                            
________/*                                                 
________ * Ok, it wasn't cached, so we need to create a new
________ * page..                                          
________ */                                                
________if (!cached_page) {                                
____________cached_page = page_cache_alloc_cold(mapping);  
____________if (!cached_page) {                            
________________desc->error = -ENOMEM;                     
________________goto out;                                  
____________}                                              
________}                                                  
________error = add_to_page_cache_lru(cached_page, mapping,
________________________index, GFP_KERNEL);                
________if (error) {                                       
____________if (error == -EEXIST)                          
________________goto find_page;                            
____________desc->error = error;                           
____________goto out;                                      
________}                                                  
________page = cached_page;                                
________cached_page = NULL;                                
________goto readpage;                                     
____}                                                      

/*7.更新文件位置*/
out:                                                      
____*_ra = ra;                                            
                                                          
____*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
____if (cached_page)                                      
________page_cache_release(cached_page);                  
____if (filp)                                             
________file_accessed(filp);                              
}                                                         

page cache的操作小结:do_generic_mapping_read(filp->f_mapping, &filp->f_ra, filp, ppos, desc, actor);整体上来看分为三个部分:

1)在page cache中能找到对应的page。(find_get_page(mapping, index);->radix_tree_lookup)

2)在page cache中没有对应的page,则申请page。(page_cache_alloc_cold->alloc_pages)

3)添加page到page cache中。(add_to_page_cache_lru->add_to_page_cache->radix_tree_insert)

以上三部分最终转换为对radix tree及内存的操作,由此得出page cache是由radix tree结构来管理的。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值