generic_perform_write()

最新推荐文章于 2022-07-03 23:37:33 发布

weixin_33724059

最新推荐文章于 2022-07-03 23:37:33 发布

阅读量457

点赞数

文章标签： python

原文链接：https://my.oschina.net/u/2475751/blog/535859

版权

2019独角兽企业重金招聘Python工程师标准>>>

先来看下ftrace, 时间基本上花在了ocfs2_write_begin()函数中。

 1)               |  generic_perform_write() {
 1) ! 12184.13 us |    ocfs2_write_begin();
 1)   0.341 us    |    iov_iter_copy_from_user_atomic();
 1) + 11.759 us   |    ocfs2_write_end();
 1) ! 12198.37 us |  }

再作简单分析：

2577 static ssize_t generic_perform_write(struct file *file,
2578                 struct iov_iter *i, loff_t pos)
2579 {
2580     struct address_space *mapping = file->f_mapping;
2581     const struct address_space_operations *a_ops = mapping->a_ops;
2582     long status = 0;
2583     ssize_t written = 0;
2584     unsigned int flags = 0;
2585 
//请无视
2586     /*
2587      * Copies from kernel address space cannot fail (NFSD is a big user).
2588      */
2589     if (segment_eq(get_fs(), KERNEL_DS))
2590         flags |= AOP_FLAG_UNINTERRUPTIBLE;
2591 
// do{}while()里面肯定是规律性重复
2592     do {
2593         struct page *page;
2594         unsigned long offset;   /* Offset into pagecache page */
2595         unsigned long bytes;    /* Bytes to write to page */
2596         size_t copied;      /* Bytes copied from user */
2597         void *fsdata;
2598 
//offset: pos是文件指针（这里指针意思是“位置”），若以页大小为单位来看待文件，offset即最后一页中文件
//的末尾；
//bytes: 这一次迭代，打算写入的字节数，如果最后一页空闲部分放不下iovec第一个分量，就先把尾页填满；
2599         offset = (pos & (PAGE_CACHE_SIZE - 1));
2600         bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2601                         iov_iter_count(i));
2602 
2603 again:
2604 
2605         /*
2606          * Bring in the user page that we will copy from _first_.
2607          * Otherwise there's a nasty deadlock on copying from the
2608          * same page as we're writing to, without it being marked
2609          * up-to-date.
2610          *
2611          * Not only is this an optimisation, but it is also required
2612          * to check that the address is actually valid, when atomic
2613          * usercopies are used, below.
2614          */
//掠过所有unlikely ;-)
2615         if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
2616             status = -EFAULT;
2617             break;
2618         }
2619 
//write_begin()以后单独分析,先简单说几句。write_begin回调ocfs2_write_begin()，ocfs2_*
//调用ocfs2_inode_lock加EX锁，这个锁很厉害，会导致其他节点inode pagecache失效；down_write ip_alloc_sem
//防止在接下来ocfs2_write()中，->readpage()和空间分配并发执行。
2620         status = a_ops->write_begin(file, mapping, pos, bytes, flags,
2621                         &page, &fsdata);
2622         if (unlikely(status))
2623             break;
2624 
//如果用户进程调用mmap()对文件做了shared映射，恰好操作的是同一个page，就先的刷下pagecache;
//刷page cache前，先把data cache刷新到内存；
2625         if (mapping_writably_mapped(mapping))
2626             flush_dcache_page(page);
2627 
//copy from user;-)
2628         pagefault_disable();
2629         copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2630         pagefault_enable();
2631         flush_dcache_page(page);
2632 
2633         mark_page_accessed(page);
//与write_begin相反，调用block_commit_write()向块层发送写请求，更新inode,dinode统计量，
//ocfs2_commit_trans()提交事物，销毁写辅助结构体
2634         status = a_ops->write_end(file, mapping, pos, bytes, copied,
2635                         page, fsdata);
2636         if (unlikely(status < 0))
2637             break;
2638         copied = status;
2639 
2640         cond_resched();
2641 
2642         iov_iter_advance(i, copied);
2643         if (unlikely(copied == 0)) {
2644             /*
2645              * If we were unable to copy any data at all, we must
2646              * fall back to a single segment length write.
2647              *
2648              * If we didn't fallback here, we could livelock
2649              * because not all segments in the iov can be copied at
2650              * once without a pagefault.
2651              */
2652             bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2653                         iov_iter_single_seg_count(i));
2654             goto again;
2655         }
2656         pos += copied;
2657         written += copied;
2658 
//这个函数调用了balance_dirty_pages_ratelimited_cr(mapping, 1); 从这个1看出来，while循环
//一次只处理一个page； 这个函数会周期性检查内存dirty程度，发起write back;
2659         balance_dirty_pages_ratelimited(mapping);
2660 
2661     } while (iov_iter_count(i));
2662 
2663     return written ? written : status;
2664 }

转载于:https://my.oschina.net/u/2475751/blog/535859