文件数据管理
传统文件系统,如ext2通过ext2_inode的i_block成员管理文件的数据,i_block的直接块和间接块组成了一棵树,对某个逻辑地址的读取,需要在这个树上找到相应的物理块指针。
UBIFS为每一片数据创建一个data node,这一片数据一般指UBIFS_BLOCK_SIZE。data node被插入到wandering tree上,通过ino+type+blockno组成的key在wandering tree上查找blockno对应的data node,因此可以确保文件相邻数据块的data node索引信息是聚集的,避免了查找索引节点导致的文件读性能下降。
UBIFS的预读 bulk-read
bulk-read类似于文件系统的read-ahead。预读是文件系统的一个优化技术,在要求读取的数据基础上,多读取一些。这是假定文件的读取经常是连续访问的,因此系统尝试在用户真正请求数据前就把数据读入内存中。
Read-ahead是linux VFS实现的,并不需要底层具体文件系统的支持,read-ahead在传统的块设备文件系统上工作的很好,但是对于UBIFS并不很好,UBIFS工作于UBI之上,UBI又在MTD之上,而MTD又是同步的,本身并没有实现请求队列,这就意味着VFS阻塞在UBIFS的读上面直到所有预读块读完。相反block-device API是异步的,reader不需要等待read-ahead.
VFS的预读是为hard drives设计的,但是raw flash设备和hard disk在寻道上花费巨大时间是完全不同的,所以这个技术仅限于HDDs而对于flash来讲没有必要
因此,VFS read-ahead对UBIFS来说不是改善而是拖累,所以UBIFS disable VFS的read-ahead。但是UBIFS有自己的预读实现,我们称之为bulk-read,可以在文件系统mount option激活build-read
有一些flash设备,一次读取大片数据要比分多次读取数据块很多,比如,OneNand可以read-while-load如果读取多个page的话,所以有时一次读大片数据可以带来性能的改善,这就是bulk-read所有的。
如果UBIFS注意到一个文件正在连续读取(至少3个连续4KiB块被读取了),并且UBIFS看到后面的数据驻留在相同的LEB上,UBIFS开始发送大的数据读取请求,以追求更高的单位读取速率。
例如,假定用户连续的读取一个文件。并且很幸运的文件是连续的存放在flash media上。假定LEB25包含的data node都属于这个文件爱女,并且data node在逻辑上(文件内偏移)和物理上(逻辑块内偏移)都是连续的。假定用户从LEB25的offset 0 开始读取,在这种情况下UBIFS读取整个LEB25,然后用读到的数据刷新文件cache,那么当用户请求下一个data node时,它已经在file cache中了
很明显的是,bulk-read在某些情况下可能会造成系统变慢,所以在使用bulk-read时一定要小心,尤其在一个已经高度碎片化的系统上。
fs/ubifs/file.c分析
/**
* struct ubifs_data_node - data node.
* @ch: common header
* @key: node key
* @size: uncompressed data size in bytes
* @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc)
* @padding: reserved for future, zeroes
* @data: data
*
* Note, do not forget to amend 'zero_data_node_unused()' function when
* changing the padding fields.
*/
struct ubifs_data_node {
struct ubifs_ch ch;
__u8 key[UBIFS_MAX_KEY_LEN];
__le32 size;
__le16 compr_type;
__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
__u8 data[];
} __attribute__ ((packed));
@key 数据节点的key,由文件的inode no + key type + block number组成
@size 未压缩的数据大小,一般情况下,size是UBIFS_BLOCK_SIZE,最后一块可能小于UBIFS_BLOCK_SIZE
@data 压缩的数据
@ch->len - sizeof(struct ubifs_data_node): @data的大小可以通过common header内的len减去ubifs_data_node结构的尺寸得到
57 static int read_block(struct inode *inode, void *addr, unsigned int block,
58 struct ubifs_data_node *dn)
59 {
60 struct ubifs_info *c = inode->i_sb->s_fs_info;
61 int err, len, out_len;
62 union ubifs_key key;
63 unsigned int dlen;
64
65 data_key_init(c, &key, inode->i_ino, block);
66 err = ubifs_tnc_lookup(c, &key, dn);
67 if (err) {
68 if (err == -ENOENT)
69 /* Not found, so it must be a hole */
70 memset(addr, 0, UBIFS_BLOCK_SIZE);
71 return err;
72 }
73
74 ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
75 ubifs_inode(inode)->creat_sqnum);
76 len = le32_to_cpu(dn->size);
77 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
78 goto dump;
79
80 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
81 out_len = UBIFS_BLOCK_SIZE;
82 err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
83 le16_to_cpu(dn->compr_type));
84 if (err || len != out_len)
85 goto dump;
86
87 /*
88 * Data length can be less than a full block, even for blocks that are
89 * not the last in the file (e.g., as a result of making a hole and
90 * appending data). Ensure that the remainder is zeroed out.
91 */
92 if (len < UBIFS_BLOCK_SIZE)
93 memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
94
95 return 0;
96
97 dump:
98 ubifs_err("bad data node (block %u, inode %lu)",
99 block, inode->i_ino);
100 dbg_dump_node(c, dn);
101 return -EINVAL;
102 }
read_block是readpage主要实现,@inode要读取的文件inode, @addr数据保存的buffer, @block指明了要放问的文件的块号, dn辅助数据
65 首先用ino和blockno生成wandering tree的key
66 从树中读取data node
82 ubifs_decompress把dn->data的数据解压到@addr中
92 ~ 93 如果解压后的数据小于UBIFS_BLOCK_SIZE那么把其他部分填充0, 注释中描述了两种情况导致解压后的数据size小于UBIFS_BLOCK_SIZE
1. 文件末尾
2. 文件中间的hole
700 /**
701 * ubifs_do_bulk_read - do bulk-read.
702 * @c: UBIFS file-system description object
703 * @bu: bulk-read information
704 * @page1: first page to read
705 *
706 * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
707 */
708 static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
709 struct page *page1)
710 {
711 pgoff_t offset = page1->index, end_index;
712 struct address_space *mapping = page1->mapping;
713 struct inode *inode = mapping->host;
714 struct ubifs_inode *ui = ubifs_inode(inode);
715 int err, page_idx, page_cnt, ret = 0, n = 0;
716 int allocate = bu->buf ? 0 : 1;
717 loff_t isize;
718
719 err = ubifs_tnc_get_bu_keys(c, bu);
720 if (err)
721 goto out_warn;
722
723 if (bu->eof) {
724 /* Turn off bulk-read at the end of the file */
725 ui->read_in_a_row = 1;
726 ui->bulk_read = 0;
727 }
728
729 page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
730 if (!page_cnt) {
731 /*
732 * This happens when there are multiple blocks per page and the
733 * blocks for the first page we are looking for, are not
734 * together. If all the pages were like this, bulk-read would
735 * reduce performance, so we turn it off for a while.
736 */
737 goto out_bu_off;
738 }
739
740 if (bu->cnt) {
741 if (allocate) {
742 /*
743 * Allocate bulk-read buffer depending on how many data
744 * nodes we are going to read.
745 */
746 bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
747 bu->zbranch[bu->cnt - 1].len -
748 bu->zbranch[0].offs;
749 ubifs_assert(bu->buf_len > 0);
750 ubifs_assert(bu->buf_len <= c->leb_size);
751 bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
752 if (!bu->buf)
753 goto out_bu_off;
754 }
755
756 err = ubifs_tnc_bulk_read(c, bu);
757 if (err)
758 goto out_warn;
759 }
760
761 err = populate_page(c, page1, bu, &n);
762 if (err)
763 goto out_warn;
764
765 unlock_page(page1);
766 ret = 1;
767
768 isize = i_size_read(inode);
769 if (isize == 0)
770 goto out_free;
771 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
772
773 for (page_idx = 1; page_idx < page_cnt; page_idx++) {
774 pgoff_t page_offset = offset + page_idx;
775 struct page *page;
776
777 if (page_offset > end_index)
778 break;
779 page = find_or_create_page(mapping, page_offset,
780 GFP_NOFS | __GFP_COLD);
781 if (!page)
782 break;
783 if (!PageUptodate(page))
784 err = populate_page(c, page, bu, &n);
785 unlock_page(page);
786 page_cache_release(page);
787 if (err)
788 break;
789 }
790
791 ui->last_page_read = offset + page_idx - 1;
792
793 out_free:
794 if (allocate)
795 kfree(bu->buf);
796 return ret;
797
798 out_warn:
799 ubifs_warn("ignoring error %d and skipping bulk-read", err);
800 goto out_free;
801
802 out_bu_off:
803 ui->read_in_a_row = ui->bulk_read = 0;
804 goto out_free;
805 }
ubifs_do_bulk_read不仅读取数据到@page1中,而且还会分配新的page并且把bulk-read读取的数据保存到这些新的page中
719 获取这些data node的zbranch
741 ~ 754 需要分配bu->buf
756 ubifs_tnc_bulk_read 读取@bu指定的flash数据到@bu->buf中
761 populate_page会把bu->buf内的数据解压然后保存到page中
773 ~ 789 为其他预读的数据生成page cache,并存入读取的数据
807 /**
808 * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
809 * @page: page from which to start bulk-read.
810 *
811 * Some flash media are capable of reading sequentially at faster rates. UBIFS
812 * bulk-read facility is designed to take advantage of that, by reading in one
813 * go consecutive data nodes that are also located consecutively in the same
814 * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
815 */
816 static int ubifs_bulk_read(struct page *page)
817 {
818 struct inode *inode = page->mapping->host;
819 struct ubifs_info *c = inode->i_sb->s_fs_info;
820 struct ubifs_inode *ui = ubifs_inode(inode);
821 pgoff_t index = page->index, last_page_read = ui->last_page_read;
822 struct bu_info *bu;
823 int err = 0, allocated = 0;
824
825 ui->last_page_read = index;
826 if (!c->bulk_read)
827 return 0;
828
829 /*
830 * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
831 * so don't bother if we cannot lock the mutex.
832 */
833 if (!mutex_trylock(&ui->ui_mutex))
834 return 0;
835
836 if (index != last_page_read + 1) {
837 /* Turn off bulk-read if we stop reading sequentially */
838 ui->read_in_a_row = 1;
839 if (ui->bulk_read)
840 ui->bulk_read = 0;
841 goto out_unlock;
842 }
843
844 if (!ui->bulk_read) {
845 ui->read_in_a_row += 1;
846 if (ui->read_in_a_row < 3)
847 goto out_unlock;
848 /* Three reads in a row, so switch on bulk-read */
849 ui->bulk_read = 1;
850 }
851
852 /*
853 * If possible, try to use pre-allocated bulk-read information, which
854 * is protected by @c->bu_mutex.
855 */
856 if (mutex_trylock(&c->bu_mutex))
857 bu = &c->bu;
858 else {
859 bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
860 if (!bu)
861 goto out_unlock;
862
863 bu->buf = NULL;
864 allocated = 1;
865 }
866
867 bu->buf_len = c->max_bu_buf_len;
868 data_key_init(c, &bu->key, inode->i_ino,
869 page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
870 err = ubifs_do_bulk_read(c, bu, page);
871
872 if (!allocated)
873 mutex_unlock(&c->bu_mutex);
874 else
875 kfree(bu);
876
877 out_unlock:
878 mutex_unlock(&ui->ui_mutex);
879 return err;
880 }
825 系统不支持bulk-read,直接返回
835 ~ 842 如果不是连续的读取,那么终止当前inode的bulk-read
844 ~ 850 连续的读取了三个块,那么启动bulk-read
867 预读32 data node
868 ~ 870 初始化bu的key:文件的ino + page的起始地址
882 static int ubifs_readpage(struct file *file, struct page *page)
883 {
884 if (ubifs_bulk_read(page))
885 return 0;
886 do_readpage(page);
887 unlock_page(page);
888 return 0;
889 }
ubifs_readpage是ubifs address_space_operations的readpage接口实现
884 首先通过ubifs特有的ubifs_bulk_read中去读取给定的page,如果支持bulk read,则调用bulk read读取
886 do_readpage读取数据