UBIFS文件系统分析5 - 文件读写 .

最新推荐文章于 2022-06-16 15:35:33 发布

gjy938815

最新推荐文章于 2022-06-16 15:35:33 发布

阅读量884

点赞数

分类专栏： linux 文件系统

linux 文件系统专栏收录该内容

31 篇文章 2 订阅

订阅专栏

文件数据管理

传统文件系统，如ext2通过ext2_inode的i_block成员管理文件的数据，i_block的直接块和间接块组成了一棵树，对某个逻辑地址的读取，需要在这个树上找到相应的物理块指针。

UBIFS为每一片数据创建一个data node，这一片数据一般指UBIFS_BLOCK_SIZE。data node被插入到wandering tree上，通过ino+type+blockno组成的key在wandering tree上查找blockno对应的data node，因此可以确保文件相邻数据块的data node索引信息是聚集的，避免了查找索引节点导致的文件读性能下降。

UBIFS的预读 bulk-read

bulk-read类似于文件系统的read-ahead。预读是文件系统的一个优化技术，在要求读取的数据基础上，多读取一些。这是假定文件的读取经常是连续访问的，因此系统尝试在用户真正请求数据前就把数据读入内存中。

Read-ahead是linux VFS实现的，并不需要底层具体文件系统的支持，read-ahead在传统的块设备文件系统上工作的很好，但是对于UBIFS并不很好，UBIFS工作于UBI之上，UBI又在MTD之上，而MTD又是同步的，本身并没有实现请求队列，这就意味着VFS阻塞在UBIFS的读上面直到所有预读块读完。相反block-device API是异步的，reader不需要等待read-ahead.

VFS的预读是为hard drives设计的，但是raw flash设备和hard disk在寻道上花费巨大时间是完全不同的，所以这个技术仅限于HDDs而对于flash来讲没有必要

因此，VFS read-ahead对UBIFS来说不是改善而是拖累，所以UBIFS disable VFS的read-ahead。但是UBIFS有自己的预读实现，我们称之为bulk-read，可以在文件系统mount option激活build-read

有一些flash设备，一次读取大片数据要比分多次读取数据块很多，比如，OneNand可以read-while-load如果读取多个page的话，所以有时一次读大片数据可以带来性能的改善，这就是bulk-read所有的。

如果UBIFS注意到一个文件正在连续读取(至少3个连续4KiB块被读取了)，并且UBIFS看到后面的数据驻留在相同的LEB上，UBIFS开始发送大的数据读取请求，以追求更高的单位读取速率。

例如，假定用户连续的读取一个文件。并且很幸运的文件是连续的存放在flash media上。假定LEB25包含的data node都属于这个文件爱女，并且data node在逻辑上(文件内偏移)和物理上（逻辑块内偏移）都是连续的。假定用户从LEB25的offset 0 开始读取，在这种情况下UBIFS读取整个LEB25，然后用读到的数据刷新文件cache，那么当用户请求下一个data node时，它已经在file cache中了

很明显的是，bulk-read在某些情况下可能会造成系统变慢，所以在使用bulk-read时一定要小心，尤其在一个已经高度碎片化的系统上。

fs/ubifs/file.c分析

ubifs data node 磁盘结构
/**
* struct ubifs_data_node - data node.
* @ch: common header
* @key: node key
* @size: uncompressed data size in bytes
* @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc)
* @padding: reserved for future, zeroes
* @data: data
*
* Note, do not forget to amend 'zero_data_node_unused()' function when
* changing the padding fields.
*/
struct ubifs_data_node {
    struct ubifs_ch ch;
    __u8 key[UBIFS_MAX_KEY_LEN];
    __le32 size;
    __le16 compr_type;
    __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
    __u8 data[];
} __attribute__ ((packed));
@key 数据节点的key，由文件的inode no + key type + block number组成
@size 未压缩的数据大小，一般情况下，size是UBIFS_BLOCK_SIZE，最后一块可能小于UBIFS_BLOCK_SIZE
@data 压缩的数据
@ch->len - sizeof(struct ubifs_data_node): @data的大小可以通过common header内的len减去ubifs_data_node结构的尺寸得到

57 static int read_block(struct inode *inode, void *addr, unsigned int block,
58               struct ubifs_data_node *dn)
59 {
60     struct ubifs_info *c = inode->i_sb->s_fs_info;
61     int err, len, out_len;
62     union ubifs_key key;
63     unsigned int dlen;
64
65     data_key_init(c, &key, inode->i_ino, block);
66     err = ubifs_tnc_lookup(c, &key, dn);
67     if (err) {
68         if (err == -ENOENT)
69             /* Not found, so it must be a hole */
70             memset(addr, 0, UBIFS_BLOCK_SIZE);
71         return err;
72     }
73
74     ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
75              ubifs_inode(inode)->creat_sqnum);
76     len = le32_to_cpu(dn->size);
77     if (len <= 0 || len > UBIFS_BLOCK_SIZE)
78         goto dump;
79
80     dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
81     out_len = UBIFS_BLOCK_SIZE;
82     err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
83                    le16_to_cpu(dn->compr_type));
84     if (err || len != out_len)
85         goto dump;
86
87     /*
88      * Data length can be less than a full block, even for blocks that are
89      * not the last in the file (e.g., as a result of making a hole and
90      * appending data). Ensure that the remainder is zeroed out.
91      */
92     if (len < UBIFS_BLOCK_SIZE)
93         memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
94
95     return 0;
96
97 dump:
98     ubifs_err("bad data node (block %u, inode %lu)",
99           block, inode->i_ino);
100     dbg_dump_node(c, dn);
101     return -EINVAL;
102 }
read_block是readpage主要实现，@inode要读取的文件inode, @addr数据保存的buffer, @block指明了要放问的文件的块号, dn辅助数据
65 首先用ino和blockno生成wandering tree的key
66 从树中读取data node
82 ubifs_decompress把dn->data的数据解压到@addr中
92 ~ 93 如果解压后的数据小于UBIFS_BLOCK_SIZE那么把其他部分填充0, 注释中描述了两种情况导致解压后的数据size小于UBIFS_BLOCK_SIZE
1. 文件末尾
2. 文件中间的hole

700 /**
701 * ubifs_do_bulk_read - do bulk-read.
702 * @c: UBIFS file-system description object
703 * @bu: bulk-read information
704 * @page1: first page to read
705 *
706 * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
707 */
708 static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
709                   struct page *page1)
710 {
711     pgoff_t offset = page1->index, end_index;
712     struct address_space *mapping = page1->mapping;
713     struct inode *inode = mapping->host;
714     struct ubifs_inode *ui = ubifs_inode(inode);
715     int err, page_idx, page_cnt, ret = 0, n = 0;
716     int allocate = bu->buf ? 0 : 1;
717     loff_t isize;
718
719     err = ubifs_tnc_get_bu_keys(c, bu);
720     if (err)
721         goto out_warn;
722
723     if (bu->eof) {
724         /* Turn off bulk-read at the end of the file */
725         ui->read_in_a_row = 1;
726         ui->bulk_read = 0;
727     }
728
729     page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
730     if (!page_cnt) {
731         /*
732          * This happens when there are multiple blocks per page and the
733          * blocks for the first page we are looking for, are not
734          * together. If all the pages were like this, bulk-read would
735          * reduce performance, so we turn it off for a while.
736          */
737         goto out_bu_off;
738     }
739
740     if (bu->cnt) {
741         if (allocate) {
742             /*
743              * Allocate bulk-read buffer depending on how many data
744              * nodes we are going to read.
745              */
746             bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
747                       bu->zbranch[bu->cnt - 1].len -
748                       bu->zbranch[0].offs;
749             ubifs_assert(bu->buf_len > 0);
750             ubifs_assert(bu->buf_len <= c->leb_size);
751             bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
752             if (!bu->buf)
753                 goto out_bu_off;
754         }
755
756         err = ubifs_tnc_bulk_read(c, bu);
757         if (err)
758             goto out_warn;
759     }
760
761     err = populate_page(c, page1, bu, &n);
762     if (err)
763         goto out_warn;
764
765     unlock_page(page1);
766     ret = 1;
767
768     isize = i_size_read(inode);
769     if (isize == 0)
770         goto out_free;
771     end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
772
773     for (page_idx = 1; page_idx < page_cnt; page_idx++) {
774         pgoff_t page_offset = offset + page_idx;
775         struct page *page;
776
777         if (page_offset > end_index)
778             break;
779         page = find_or_create_page(mapping, page_offset,
780                        GFP_NOFS | __GFP_COLD);
781         if (!page)
782             break;
783         if (!PageUptodate(page))
784             err = populate_page(c, page, bu, &n);
785         unlock_page(page);
786         page_cache_release(page);
787         if (err)
788             break;
789     }
790
791     ui->last_page_read = offset + page_idx - 1;
792
793 out_free:
794     if (allocate)
795         kfree(bu->buf);
796     return ret;
797
798 out_warn:
799     ubifs_warn("ignoring error %d and skipping bulk-read", err);
800     goto out_free;
801
802 out_bu_off:
803     ui->read_in_a_row = ui->bulk_read = 0;
804     goto out_free;
805 }
ubifs_do_bulk_read不仅读取数据到@page1中，而且还会分配新的page并且把bulk-read读取的数据保存到这些新的page中
719 获取这些data node的zbranch
741 ~ 754 需要分配bu->buf
756 ubifs_tnc_bulk_read 读取@bu指定的flash数据到@bu->buf中
761 populate_page会把bu->buf内的数据解压然后保存到page中
773 ~ 789 为其他预读的数据生成page cache，并存入读取的数据

807 /**
808 * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
809 * @page: page from which to start bulk-read.
810 *
811 * Some flash media are capable of reading sequentially at faster rates. UBIFS
812 * bulk-read facility is designed to take advantage of that, by reading in one
813 * go consecutive data nodes that are also located consecutively in the same
814 * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
815 */
816 static int ubifs_bulk_read(struct page *page)
817 {
818     struct inode *inode = page->mapping->host;
819     struct ubifs_info *c = inode->i_sb->s_fs_info;
820     struct ubifs_inode *ui = ubifs_inode(inode);
821     pgoff_t index = page->index, last_page_read = ui->last_page_read;
822     struct bu_info *bu;
823     int err = 0, allocated = 0;
824
825     ui->last_page_read = index;
826     if (!c->bulk_read)
827         return 0;
828
829     /*
830      * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
831      * so don't bother if we cannot lock the mutex.
832      */
833     if (!mutex_trylock(&ui->ui_mutex))
834         return 0;
835
836     if (index != last_page_read + 1) {
837         /* Turn off bulk-read if we stop reading sequentially */
838         ui->read_in_a_row = 1;
839         if (ui->bulk_read)
840             ui->bulk_read = 0;
841         goto out_unlock;
842     }
843
844     if (!ui->bulk_read) {
845         ui->read_in_a_row += 1;
846         if (ui->read_in_a_row < 3)
847             goto out_unlock;
848         /* Three reads in a row, so switch on bulk-read */
849         ui->bulk_read = 1;
850     }
851
852     /*
853      * If possible, try to use pre-allocated bulk-read information, which
854      * is protected by @c->bu_mutex.
855      */
856     if (mutex_trylock(&c->bu_mutex))
857         bu = &c->bu;
858     else {
859         bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
860         if (!bu)
861             goto out_unlock;
862
863         bu->buf = NULL;
864         allocated = 1;
865     }
866
867     bu->buf_len = c->max_bu_buf_len;
868     data_key_init(c, &bu->key, inode->i_ino,
869               page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
870     err = ubifs_do_bulk_read(c, bu, page);
871
872     if (!allocated)
873         mutex_unlock(&c->bu_mutex);
874     else
875         kfree(bu);
876
877 out_unlock:
878     mutex_unlock(&ui->ui_mutex);
879     return err;
880 }

825 系统不支持bulk-read，直接返回
835 ~ 842 如果不是连续的读取，那么终止当前inode的bulk-read
844 ~ 850 连续的读取了三个块，那么启动bulk-read
867 预读32 data node
868 ~ 870 初始化bu的key：文件的ino + page的起始地址

882 static int ubifs_readpage(struct file *file, struct page *page)
883 {
884     if (ubifs_bulk_read(page))
885         return 0;
886     do_readpage(page);
887     unlock_page(page);
888     return 0;
889 }
ubifs_readpage是ubifs address_space_operations的readpage接口实现
884 首先通过ubifs特有的ubifs_bulk_read中去读取给定的page，如果支持bulk read，则调用bulk read读取
886 do_readpage读取数据