UBIFS文件系统二之挂载UBIFS的代码分析


分类: LINUX

陆陆续续的看UBIFS很长时间了,一直没有写出一点东西。因为我在=到能够系统的理解UBIFS的时候再写出一点东西。但是因为工作比较忙,UBIFS源码读的断断续续,老是需要复习拾起,比较浪费时间,所以决定写出一点东西,做个备份吧。
我决定在读UBIFS源码之前需要读两份关于UBIF设计的文档:
一份是《UBI-Unsorted Block Images》  ubidesign.pdf   
另外一份是《A Brief Introduction to the design of UBIFS》  A Brief Introduction to the Design of UBIFS.pdf   
这两份简洁的介绍了UBIFS设计的一些结构和考虑。

我们按照挂载ubifs的工序来分析代码:

(1)ubiattach /dev/ubi_ctrl -m 3                                   

(2)ubimkvol /dev/ubi0 -N ubifs -s15MiB

(3)mount -t ubifs ubi0:ubifs /mnt

首先先分析(1),相应的代码是ubi_attach_mtd_dev()函数,下面我们紧跟代码来看看究竟干了些什么。

1.ubi_attach_mtd_dev

int ubi_attach_mtd_dev(struct mtd_info*mtd, int ubi_num, int vid_hdr_offset)

{

//ubi_num, vid_hdr_offset是命令传进来的参数

       structubi_device *ubi;

       inti, err, do_free = 1;

 

       /*

        * Check if we already have the same MTD deviceattached.

        *

        * Note, this function assumes that UBI devicescreations and deletions

        * are serialized, so it does not take the&ubi_devices_lock.

        */

       for(i = 0; i < UBI_MAX_DEVICES; i++) {

              ubi= ubi_devices[i];

              if(ubi && mtd->index == ubi->mtd->index) {

                     dbg_err("mtd%dis already attached to ubi%d",

                            mtd->index,i);

                     return-EEXIST;

              }

       }

//上面的这段代码可以看英文注释,一个mtd设备(一个分区)不能被attach两次,除非你已经deatch了。所以在这段代码的开始就检查被attach的mtd设备是否已经被attach了。

 

       if(mtd->type == MTD_UBIVOLUME) {

              ubi_err("refuseattaching mtd%d - it is already emulated on "

                     "topof UBI", mtd->index);

              return-EINVAL;

       }

上面的代码接着检查被attach的mtd设备时候是一个mtdvolume(卷区),如果已经是一个mtd卷了,那么就不能再被attach了。

       if (ubi_num == UBI_DEV_NUM_AUTO) {

              /*Search for an empty slot in the @ubi_devices array */

              for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES;ubi_num++)

                     if (!ubi_devices[ubi_num])

                            break;

如果在终端输入命令的时候没有带ubinum,那么就是自动分配ubinum,系统就会从ubi_device[]数组中找出一个没被使用的ubinum号

              if (ubi_num == UBI_MAX_DEVICES) {

                     dbg_err("only %d UBI devices may becreated",

                            UBI_MAX_DEVICES);

                     return -ENFILE;

              }

       } else {

              if (ubi_num >= UBI_MAX_DEVICES)

                     return -EINVAL;

如果ubi_num > UBI_MAX_DEVICES,就代表没有空余ubinum号可供分配,返回出错

              /* Make sure ubi_num is not busy */

              if (ubi_devices[ubi_num]) {

                     dbg_err("ubi%d already exists",ubi_num);

                     return -EEXIST;

              }

       }

 

       ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL);

       if (!ubi)

              return -ENOMEM;

 

       ubi->mtd = mtd;

       ubi->ubi_num = ubi_num;

       ubi->vid_hdr_offset = vid_hdr_offset;

       ubi->autoresize_vol_id = -1;

 

       mutex_init(&ubi->buf_mutex);

       mutex_init(&ubi->ckvol_mutex);

       mutex_init(&ubi->mult_mutex);

       mutex_init(&ubi->volumes_mutex);

       spin_lock_init(&ubi->volumes_lock);

初始化信号

       ubi_msg("attaching mtd%d to ubi%d", mtd->index,ubi_num);

 

       err = io_init(ubi);

       if (err)

              goto out_free;

下面跟着io_init()往下分析:

static intio_init(struct ubi_device *ubi)

{

       if (ubi->mtd->numeraseregions != 0) {

              ubi_err("multiple regions, not implemented");

              return -EINVAL;

       }

Numeraseregions是扫描nandflash得到的信息,如果numeraseregions等于0,代表我们需要attach的设备已经擦除过了

       if (ubi->vid_hdr_offset < 0)

              return -EINVAL;

ubi->vid_hdr_offset显然应该是一个正数,一般是nandflash的一页,我们的4020上的nandflash页大小为512字节,所以ubi->vid_hdr_offset为512.这儿再稍微说一下,EC header和VID header,是记录我们ubi管理信息。一般EC在一个擦除块的第一页,所以偏移量为0,VID在擦除块的第二页上,所以偏移量为512.,在我们4020的nandflash上,一个擦除块的大小为16K,也就是32页。

下面接着讲我们的扫描信息写进mtd结构体

       ubi->peb_size   =ubi->mtd->erasesize;

       ubi->peb_count  =ubi->mtd->size / ubi->mtd->erasesize;

Peb_count是指逻辑块的数目,也就是总的大小除以每一页的大小

       ubi->flash_size = ubi->mtd->size;

 

       if (ubi->mtd->block_isbad &&ubi->mtd->block_markbad)

              ubi->bad_allowed = 1;

 

       ubi->min_io_size = ubi->mtd->writesize;

       ubi->hdrs_min_io_size = ubi->mtd->writesize >>ubi->mtd->subpage_sft;

       if (!is_power_of_2(ubi->min_io_size)) {

              ubi_err("min. I/O unit (%d) is not power of2",

                     ubi->min_io_size);

              return -EINVAL;

       }

 

       ubi_assert(ubi->hdrs_min_io_size > 0);

       ubi_assert(ubi->hdrs_min_io_size <=ubi->min_io_size);

       ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size ==0);

 

       /* Calculate defaultaligned sizes of EC and VID headers */

       ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE,ubi->hdrs_min_io_size);

       ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE,ubi->hdrs_min_io_size);

 

       dbg_msg("min_io_size      %d", ubi->min_io_size);

       dbg_msg("hdrs_min_io_size %d",ubi->hdrs_min_io_size);

       dbg_msg("ec_hdr_alsize   %d", ubi->ec_hdr_alsize);

       dbg_msg("vid_hdr_alsize  %d", ubi->vid_hdr_alsize);

       if (ubi->vid_hdr_offset == 0)

              /* Default offset */

              ubi->vid_hdr_offset = ubi->vid_hdr_aloffset =

                                 ubi->ec_hdr_alsize;

       else {

              ubi->vid_hdr_aloffset = ubi->vid_hdr_offset &

                                          ~(ubi->hdrs_min_io_size- 1);

              ubi->vid_hdr_shift = ubi->vid_hdr_offset -

                                          ubi->vid_hdr_aloffset;

       }

Io_init剩余的部分就不分析了,比较容易

接着上面ubi_attach_mtd_dev()往下说:

       ubi->peb_buf1 = vmalloc(ubi->peb_size);

       if (!ubi->peb_buf1)

              goto out_free;

 

       ubi->peb_buf2 = vmalloc(ubi->peb_size);

       if (!ubi->peb_buf2)

               goto out_free;

分配两个物理擦除块大小的buf,具体的用途下面再说

       err = attach_by_scanning(ubi);

       if (err) {

              dbg_err("failed to attach by scanning, error%d", err);

              goto out_free;

       }

我们再跟着attach_by_scanning(ubi)细说

static intattach_by_scanning(struct ubi_device *ubi)

{

       int err;

       struct ubi_scan_info *si;

 

       si = ubi_scan(ubi);

**********************************************************************************

这儿通过ubi_scan函数来扫描MTD分区的每一块。具体是调用static intprocess_eb(struct ubi_device *ubi, struct ubi_scan_info *si,int pnum)函数来读取EC和VID头(即没一块的前两页),在读每一页的时候,会调用check_pattern函数来判断这一页是否为空,如果每一页都是空的,那么就会发现这个MTD分区是空的。

**********************************************************************************

       if (IS_ERR(si))

              return PTR_ERR(si);

 

       ubi->bad_peb_count = si->bad_peb_count;

       ubi->good_peb_count = ubi->peb_count -ubi->bad_peb_count;

       ubi->max_ec = si->max_ec;

       ubi->mean_ec = si->mean_ec;

 

       err = ubi_read_volume_table(ubi, si);

       if (err)

              goto out_si;

 

       err = ubi_wl_init_scan(ubi, si);

**********************************************************************************

取之ubi_wl_init_scan(ubi, si);函数片段

list_for_each_entry_safe(seb,tmp, &si->erase, u.list) {

              cond_resched();

 

              e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);

              if (!e)

                     goto out_free;

 

              e->pnum = seb->pnum;

              e->ec = seb->ec;

              ubi->lookuptbl[e->pnum] = e;

              if (schedule_erase(ubi, e, 0)) {

                     kmem_cache_free(ubi_wl_entry_slab, e);

                     goto out_free;

              }

       }

在初始化wl的时候会将为每一个空页建立一个struct ubi_work *wl_wrk;结构体(该结构体的具体处理函数为erase_worker,擦除一块,并写入EC头),并添加到ubi->works队列中(list_add_tail(&wrk->list,&ubi->works));这儿我们渐渐的认识到ubi->works这个队列的作用,后台进程ubi_thread就是循环的处理该队列中的工作的。

在第一次attach的时候,在这儿ubi_thread进程还没有被唤醒,所以这些工作要等到进程被唤醒的时候才能被处理

**********************************************************************************

       if (err)

              goto out_vtbl;

 

       err =ubi_eba_init_scan(ubi, si);

**********************************************************************************

前面我们看到了ubi_scan,其实这个这个过程是建立ubifs的基础,因为所有关于ubiubifs的基本信息都是在scan 的过程中建立在内存中的,现在调用ubi_eba_init_scan来建立起EBA子系统就是利用前面的扫描信息,建立起没一个volumnvtl

       if (err)

              goto out_wl;

 

       ubi_scan_destroy_si(si);

       return 0;

 

out_wl:

       ubi_wl_close(ubi);

out_vtbl:

       free_internal_volumes(ubi);

       vfree(ubi->vtbl);

out_si:

       ubi_scan_destroy_si(si);

       return err;

}

 

1.1.Ubi_scan

struct ubi_scan_info*ubi_scan(struct ubi_device *ubi)

{

       int err, pnum;

       struct rb_node *rb1, *rb2;

       struct ubi_scan_volume *sv;

       struct ubi_scan_leb *seb;

       struct ubi_scan_info *si;

 

       si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL);

       if (!si)

              return ERR_PTR(-ENOMEM);

       INIT_LIST_HEAD(&si->corr);//初始化sicorrupt队列

       INIT_LIST_HEAD(&si->free);// //初始化sicorrupt队列

       INIT_LIST_HEAD(&si->erase);初始化sicorrupt队列

       INIT_LIST_HEAD(&si->alien); //初始化sicorrupt队列

       si->volumes = RB_ROOT;

       si->is_empty = 1;

       err = -ENOMEM;

       ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);//为ec头部分配空间,用于暂存后面读出的每一个peb的ec头部信息

       if (!ech)

              goto out_si;

 

       vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);//为vid头部分配空间,用于暂存后面读出的每一个peb的vid头部信息,注意扫描的目的就是收集EC和VID中信息,在内存中建立相关的信息

       if (!vidh)

              goto out_ech;

 

       for (pnum = 0; pnum < ubi->peb_count; pnum++) {

              cond_resched();

 

              dbg_gen("process PEB %d", pnum);

              err = process_eb(ubi, si, pnum);//具体的扫描每一个物理块

              if (err < 0)

                     goto out_vidh;

       }

 

       dbg_msg("scanning is finished");

 

       /* Calculate mean erase counter */

       if (si->ec_count)//算平均擦除次数

              si->mean_ec = div_u64(si->ec_sum,si->ec_count);

 

       if (si->is_empty)//判断这是否是一个空的MTD,如果是空的话,那么后面的mount的时候调用create_default_filesystem在建立初始的ubifs数据

              ubi_msg("empty MTD device detected");

 

       /*

        * Few corrupted PEBsare not a problem and may be just a result of

        * unclean reboots.However, many of them may indicate some problems

        * with the flash HW ordriver. Print a warning in this case.

        */

       if (si->corr_count >= 8 || si->corr_count >=ubi->peb_count / 4) {

              ubi_warn("%d PEBs are corrupted",si->corr_count);

              printk(KERN_WARNING "corrupted PEBs are:");

              list_for_each_entry(seb, &si->corr, u.list)

                     printk(KERN_CONT " %d", seb->pnum);

              printk(KERN_CONT "\n");

       }

 

       /*

        * In case of unknownerase counter we use the mean erase counter

        * value.

        */

       ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {

              ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)

                     if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                            seb->ec = si->mean_ec;

       }

 

       list_for_each_entry(seb, &si->free, u.list) {

              if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                     seb->ec = si->mean_ec;

       }

 

       list_for_each_entry(seb, &si->corr, u.list)

              if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                     seb->ec = si->mean_ec;

 

       list_for_each_entry(seb, &si->erase, u.list)

              if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                     seb->ec = si->mean_ec;

 

       err = paranoid_check_si(ubi, si);

       if (err) {

              if (err > 0)

                     err = -EINVAL;

              goto out_vidh;

       }

 

       ubi_free_vid_hdr(ubi, vidh);

       kfree(ech);

 

       return si;

 

out_vidh:

       ubi_free_vid_hdr(ubi, vidh);

out_ech:

       kfree(ech);

out_si:

       ubi_scan_destroy_si(si);

       return ERR_PTR(err);

}

1.2process_eb

static intprocess_eb(struct ubi_device *ubi, struct ubi_scan_info *si,  int pnum)

{

       long long uninitialized_var(ec);

       int err, bitflips = 0, vol_id, ec_corr = 0;

 

       dbg_bld("scan PEB %d", pnum);

 

       /* Skip bad physical eraseblocks */

       err = ubi_io_is_bad(ubi, pnum);

判断一个块是否为坏块,直接调用mtd层的mtd->block_isbad

       if (err < 0)

              return err;

       else if (err) {

              /*

               * FIXME: this isactually duty of the I/O sub-system to

               * initializethis, but MTD does not provide enough

               * information.

               */

              si->bad_peb_count += 1;

              return 0;

       }

       err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);//ec header,一般为一块的第一页

       if (err < 0)

              return err;

       else if (err == UBI_IO_BITFLIPS)

              bitflips = 1;

       else if (err == UBI_IO_PEB_EMPTY)

              return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC,&si->erase);

//注意这儿,为什么这个块是empty(也就是全是0xff),还要丢到si->erase队列中呢?这是因为MTD所谓的空与UBI所谓的空不是一回事。在UBI中,空块是指只包含EC头部的块。所以这些需要将全0xff的块进行擦除,写入EC头部

       else if (err == UBI_IO_BAD_EC_HDR) {

              /*

               * We have toalso look at the VID header, possibly it is not

               * corrupted. Set%bitflips flag in order to make this PEB be

               * moved and ECbe re-created.

               */

              ec_corr = 1;

              ec = UBI_SCAN_UNKNOWN_EC;

              bitflips = 1;

       }

 

       si->is_empty = 0;

 

       if (!ec_corr) {

              int image_seq;

 

              /* Make sure UBI version is OK */

              if (ech->version != UBI_VERSION) {

                     ubi_err("this UBI version is %d, imageversion is %d",

                            UBI_VERSION, (int)ech->version);

                     return -EINVAL;

              }

 

              ec = be64_to_cpu(ech->ec);

              if (ec > UBI_MAX_ERASECOUNTER) {

                     /*

                      * Erasecounter overflow. The EC headers have 64 bits

                      *reserved, but we anyway make use of only 31 bit

                      * values,as this seems to be enough for any existing

                      * flash.Upgrade UBI and use 64-bit erase counters

                      *internally.

                      */

                     ubi_err("erase counter overflow, max is%d",

                            UBI_MAX_ERASECOUNTER);

                     ubi_dbg_dump_ec_hdr(ech);

                     return -EINVAL;

              }

 

              /*

               * Make sure thatall PEBs have the same image sequence number.

               * This allows usto detect situations when users flash UBI

               * imagesincorrectly, so that the flash has the new UBI image

               * and leftoversfrom the old one. This feature was added

               * relativelyrecently, and the sequence number was always

               * zero, becauseold UBI implementations always set it to zero.

               * For thisreasons, we do not panic if some PEBs have zero

               * sequencenumber, while other PEBs have non-zero sequence

               * number.

               */

              image_seq = be32_to_cpu(ech->image_seq);

              if (!ubi->image_seq && image_seq)

                     ubi->image_seq = image_seq;

              if (ubi->image_seq && image_seq &&

                  ubi->image_seq!= image_seq) {

                     ubi_err("bad image sequence number %d inPEB %d, "

                            "expected %d", image_seq, pnum,ubi->image_seq);

                     ubi_dbg_dump_ec_hdr(ech);

                     return -EINVAL;

              }

       }

 

       /* OK, we've done with the EC header, let's look at the VID header*/

 

       err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);

       if (err < 0)

              return err;

       else if (err == UBI_IO_BITFLIPS)

              bitflips = 1;

       else if (err == UBI_IO_BAD_VID_HDR ||

               (err == UBI_IO_PEB_FREE && ec_corr)){

//如果是一个块的VID头,那么就添加到corr队列中去

              /* VID header is corrupted */

              err = add_to_list(si, pnum, ec, &si->corr);

              if (err)

                     return err;

              goto adjust_mean_ec;

       }else if (err == UBI_IO_PEB_FREE) {

//如果VID头是空的,也就是说该PEB只存在EC头部,那么添加到free队列中,可以用于后面的分配。

              /*No VID header - the physical eraseblock is free */

              err = add_to_list(si, pnum, ec, &si->free);

              if (err)

                     return err;

              goto adjust_mean_ec;

       }

 

       vol_id = be32_to_cpu(vidh->vol_id);

       if (vol_id > UBI_MAX_VOLUMES && vol_id !=UBI_LAYOUT_VOLUME_ID) {

//判断vol_id是否合法,ubi内部存在一个layout_volume,专门用来保存uservolumn的信息

UBI maintains internalvolumes to store UBI related information e.g. volume information, flash basederase block assignment tables

              int lnum = be32_to_cpu(vidh->lnum);

 

              /* Unsupported internal volume */

              switch (vidh->compat) {

              case UBI_COMPAT_DELETE:

                     ubi_msg("\"delete\" compatibleinternal volume %d:%d"

                            " found, remove it", vol_id,lnum);

                     err = add_to_list(si, pnum, ec,&si->corr);

                     if (err)

                            return err;

                     break;

 

              case UBI_COMPAT_RO:

                     ubi_msg("read-only compatible internalvolume %d:%d"

                            " found, switch to read-onlymode",

                            vol_id, lnum);

                     ubi->ro_mode = 1;

                     break;

 

              case UBI_COMPAT_PRESERVE:

                     ubi_msg("\"preserve\" compatibleinternal volume %d:%d"

                            " found", vol_id, lnum);

                     err = add_to_list(si, pnum, ec,&si->alien);

                     if (err)

                            return err;

                     si->alien_peb_count += 1;

                     return 0;

 

              case UBI_COMPAT_REJECT:

                     ubi_err("incompatible internal volume %d:%dfound",

                            vol_id, lnum);

                     return -EINVAL;

              }

       }

 

       if (ec_corr)

              ubi_warn("valid VID header but corrupted EC headerat PEB %d",

                      pnum);

//到这儿可以判定这个PEB是一个有效的UBI块,包含有效的EC头部很有效的VID头部

       err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips);

       if (err)

              return err;

 

adjust_mean_ec:

       if (!ec_corr) {

              si->ec_sum += ec;

              si->ec_count += 1;

              if (ec > si->max_ec)

                     si->max_ec = ec;

              if (ec < si->min_ec)

                     si->min_ec = ec;

       }

 

       return 0;

}

1.3.ubi_scan_add_used

int ubi_scan_add_used (structubi_device *ubi, struct ubi_scan_info *si,int pnum, int ec, const structubi_vid_hdr *vid_hdr,int bitflips)

{

       int err, vol_id, lnum;

       unsigned long long sqnum;

       struct ubi_scan_volume *sv;

       struct ubi_scan_leb *seb;

       struct rb_node **p, *parent = NULL;

 

       vol_id = be32_to_cpu(vid_hdr->vol_id);

       lnum = be32_to_cpu(vid_hdr->lnum);

       sqnum = be64_to_cpu(vid_hdr->sqnum);

 

       dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips%d",

              pnum, vol_id, lnum, ec, sqnum, bitflips);

       sv = add_volume(si, vol_id, pnum, vid_hdr);

调用add_volumn在检查读出的pnumvolumnid号,在内存中建立volumn的红黑树

 

       if (IS_ERR(sv))

              return PTR_ERR(sv);

 

       if (si->max_sqnum < sqnum)

              si->max_sqnum = sqnum;

 

       /*

        * Walk the RB-tree oflogical eraseblocks of volume @vol_id to look

        * if this is the firstinstance of this logical eraseblock or not.

        */

       p = &sv->root.rb_node;

       while (*p) {

              int cmp_res;

 

              parent = *p;

              seb = rb_entry(parent, struct ubi_scan_leb, u.rb);

              if (lnum != seb->lnum) {

                     if (lnum < seb->lnum)

                            p = &(*p)->rb_left;

                     else

                            p = &(*p)->rb_right;

                     continue;

              }

在内存中建立ubi_scan_leb的红黑树

              /*

               * There isalready a physical eraseblock describing the same

               * logicaleraseblock present.

               */

 

              dbg_bld("this LEB already exists: PEB %d, sqnum%llu, "

                     "EC %d", seb->pnum, seb->sqnum,seb->ec);

 

              /*

               * Make sure thatthe logical eraseblocks have different

               * sequencenumbers. Otherwise the image is bad.

               *

               * However, ifthe sequence number is zero, we assume it must

               * be an ancientUBI image from the era when UBI did not have

               * sequencenumbers. We still can attach these images, unless

               * there is aneed to distinguish between old and new

               * eraseblocks,in which case we'll refuse the image in

               *'compare_lebs()'. In other words, we attach old clean

               * images, butrefuse attaching old images with duplicated

               * logicaleraseblocks because there was an unclean reboot.

               */

              //注意上面的那个while(1)的范围,到这儿的时候表示在ubi_seb的红黑树中找到了一个描述pnumubi_seb结构,那么说明什么问题呢?说明在ubi中存在多个PEB指向同一个LEB.

              //sqnum是一个持续增加的64bit的全局变量,我们认为它不会溢出,如果seb->sqnum== sqnum,那么显然是不合理的

              if (seb->sqnum == sqnum && sqnum != 0) {

                     ubi_err("two LEBs with same sequence number%llu",

                            sqnum);

                     ubi_dbg_dump_seb(seb, 0);

                     ubi_dbg_dump_vid_hdr(vid_hdr);

                     return -EINVAL;

              }

 

              /*

               * Now we have todrop the older one and preserve the newer

               * one.

               */

// * @copy_flag: if this logical eraseblockwas copied from another physical eraseblock (for wear-leveling reasons)

//如果存在多个PEB指向同一个LEB,那么一般是WL的时候,或者修改文件的时候发生了unclean reboot,那么我们就需要从这些多个PEB中找出哪个是最新的。compare_lebs就是完成这个工作的。

              cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr);

              if (cmp_res < 0)

                     return cmp_res;

 

              if (cmp_res & 1) {

                     /*

                      * Thislogical eraseblock is newer then the one

                      * foundearlier.

                      */

                     err = validate_vid_hdr(vid_hdr, sv, pnum);

                     if (err)

                            return err;

 

                     if (cmp_res & 4)

                            err = add_to_list(si, seb->pnum,seb->ec,

                                            &si->corr);

                     else

                            err = add_to_list(si, seb->pnum, seb->ec,

                                            &si->erase);

                     if (err)

                            return err;

 

                     seb->ec = ec;

                     seb->pnum = pnum;

                     seb->scrub = ((cmp_res & 2) || bitflips);

                     seb->sqnum = sqnum;

 

                     if (sv->highest_lnum == lnum)

                            sv->last_data_size =

                                   be32_to_cpu(vid_hdr->data_size);

 

                     return 0;

              }else {

                     /*

                      * Thislogical eraseblock is older than the one found

                      *previously.

                      */

                     if (cmp_res & 4)

                            return add_to_list(si, pnum, ec,&si->corr);

                     else

                            return add_to_list(si, pnum, ec,&si->erase);

              }

       }

       /*

        * We've met thislogical eraseblock for the first time, add it to the

        * scanning information.

        */

       //如果到这儿了,表示这是第一次遇到该LEB,那么很简单,将它添加到队列中就可以了

       err = validate_vid_hdr(vid_hdr, sv, pnum);

       if (err)

              return err;

       seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);

       if (!seb)

              return -ENOMEM;

       seb->ec = ec;

       seb->pnum = pnum;

       seb->lnum = lnum;

       seb->sqnum = sqnum;

       seb->scrub = bitflips;

       if (sv->highest_lnum <= lnum) {

              sv->highest_lnum = lnum;

              sv->last_data_size =be32_to_cpu(vid_hdr->data_size);

       }

       sv->leb_count += 1;

       rb_link_node(&seb->u.rb, parent, p);

       rb_insert_color(&seb->u.rb, &sv->root);

       return 0;

}

 

1.4.compare_lebs

static intcompare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,int pnum,const struct ubi_vid_hdr *vid_hdr)

{

       void *buf;

       int len, err, second_is_newer, bitflips = 0, corrupted = 0;

       uint32_t data_crc, crc;

       struct ubi_vid_hdr *vh = NULL;

       unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);

       //再次判断一下是否存在sqnum相等的情况发生

       if (sqnum2 == seb->sqnum) {

              /*

               * This must be areally ancient UBI image which has been

               * created beforesequence numbers support has been added. At

               * that times weused 32-bit LEB versions stored in logical

               * eraseblocks.That was before UBI got into mainline. We do not

               * support theseimages anymore. Well, those images will work

               * still work,but only if no unclean reboots happened.

               */

              ubi_err("unsupported on-flash UBI format\n");

              return -EINVAL;

       }

 

       /* Obviously the LEB with lower sequence counter is older */

       //因为sqnum是持续增加的,而且不会溢出。所以认为sqnum大的那个PEB是最新的。       second_is_newer= !!(sqnum2 > seb->sqnum);

       /*

        * Now we know whichcopy is newer. If the copy flag of the PEB with

        * newer version is notset, then we just return, otherwise we have to

        * check data CRC. Forthe second PEB we already have the VID header,

        * for the first one -we'll need to re-read it from flash.

        *

        * Note: this may beoptimized so that we wouldn't read twice.

        */

 

       if (second_is_newer) {

              if (!vid_hdr->copy_flag) {

                     /*It is not a copy, so it is newer */

                     dbg_bld("second PEB %d is newer, copy_flagis unset",

                            pnum);

                     return 1;

              }

       } else {

       //如果copy_flag位设置了,那么可以认为是在WL的时候发生意外。因为发生了unclear reboot,所以需要判断这个最新的PEB中的数据是否是完整的。(unclean reboot时数据可能被打断了)

                     pnum =seb->pnum;

 

              vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);

              if (!vh)

                     return -ENOMEM;

 

              err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);

              if (err) {

                     if (err == UBI_IO_BITFLIPS)

                            bitflips = 1;

                     else {

                            dbg_err("VID of PEB %d header isbad, but it "

                                   "was OK earlier", pnum);

                            if (err > 0)

                                   err = -EIO;

 

                            goto out_free_vidh;

                     }

              }

 

              if (!vh->copy_flag) {

                     /*It is not a copy, so it is newer */

                     dbg_bld("first PEB %d is newer, copy_flagis unset",

                            pnum);

                     err = bitflips << 1;

                     goto out_free_vidh;

              }

 

              vid_hdr = vh;

       }

 

       /* Read the data of the copy and check the CRC */

 

       len = be32_to_cpu(vid_hdr->data_size);

       buf = vmalloc(len);

       if (!buf) {

              err = -ENOMEM;

              goto out_free_vidh;

       }

//OK,读出数据,校验CRC

       err = ubi_io_read_data(ubi, buf, pnum, 0, len);

       if (err && err != UBI_IO_BITFLIPS && err !=-EBADMSG)

              goto out_free_buf;

 

       data_crc = be32_to_cpu(vid_hdr->data_crc);

       crc = crc32(UBI_CRC32_INIT, buf, len);

       if (crc != data_crc) {

              dbg_bld("PEB %d CRC error: calculated %#08x, mustbe %#08x",

                     pnum, crc, data_crc);

              corrupted = 1;

              bitflips = 0;

              //如果CRC校验失败了,那么还沿用老的PEB

              second_is_newer = !second_is_newer;

       }else {

              dbg_bld("PEB %d CRC is OK", pnum);

              bitflips = !!err;

       }

 

       vfree(buf);

       ubi_free_vid_hdr(ubi, vh);

 

       if (second_is_newer)

              dbg_bld("second PEB %d is newer, copy_flag isset", pnum);

       else

              dbg_bld("first PEB %d is newer, copy_flag isset", pnum);

 

       return second_is_newer | (bitflips << 1) | (corrupted<< 2);

 

out_free_buf:

       vfree(buf);

out_free_vidh:

       ubi_free_vid_hdr(ubi, vh);

       return err;

}

 

二.创建volume

ubimkvol /dev/ubi0 -N ubifs -s 15MiB

上面的这条命令是在ubi设备0上创建一个大小为15M,名字叫做ubifs的volumn

这条命令是通过ioctl实现的,我们下面来看一下相关的代码:

       /*Create volume command */

       caseUBI_IOCMKVOL:

       {

              structubi_mkvol_req req;

              dbg_gen("createvolume");

              err= copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));

              if(err) {

                     err= -EFAULT;

                     break;

              }

              req.name[req.name_len]= '\0';

              err= verify_mkvol_req(ubi, &req);

              if(err)

                     break;

              mutex_lock(&ubi->device_mutex);

              err= ubi_create_volume(ubi, &req);

              mutex_unlock(&ubi->device_mutex);

              if(err)

                     break;

              err= put_user(req.vol_id, (__user int32_t *)argp);

              if(err)

                     err= -EFAULT;

              break;

       }

函数的主体部分是ubi_create_volume。传给ubi_create_volume的是一个ubi_mkvol_req类型的结构体。

struct ubi_mkvol_req {

       __s32 vol_id;//要创建的volumnID,可以不指定

       __s32 alignment;//The @alignment field specifies the requiredalignment of the volume logical eraseblock. This means, that the size oflogical eraseblocks will be aligned to this number, i.e.,

(UBI device logicaleraseblock size) mod (@alignment) = 0.

       __s64 bytes;//volume的大小

       __s8 vol_type;//volume的类型,静态或者动态

       __s8 padding1;

       __s16 name_len;//volume的名字的长度

       __s8 padding2[4];

       char name[UBI_MAX_VOLUME_NAME + 1];

} __attribute__((packed));

intubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)

{

       int i, err, vol_id = req->vol_id, do_free = 1;

       struct ubi_volume *vol;

       struct ubi_vtbl_record vtbl_rec;

       dev_t dev;

       if (ubi->ro_mode)

              return -EROFS;

       vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);

       if (!vol)

              return -ENOMEM;

       spin_lock(&ubi->volumes_lock);

       //如果没有指定vol-id,那么就是采用默认的方式获得id

       if (vol_id == UBI_VOL_NUM_AUTO) {

              /* Find unused volume ID */

              dbg_gen("search for vacant volume ID");

              for (i = 0; i < ubi->vtbl_slots; i++)

                     if (!ubi->volumes[i]) {

                            vol_id = i;

                            break;

                     }

              if (vol_id == UBI_VOL_NUM_AUTO) {

                     dbg_err("out of volume IDs");

                     err = -ENFILE;

                     goto out_unlock;

              }

              req->vol_id = vol_id;

       }

 

       dbg_gen("create device %d, volume %d, %llu bytes, type%d, name %s",

              ubi->ubi_num, vol_id, (unsigned longlong)req->bytes,

              (int)req->vol_type, req->name);

 

       /* Ensure that this volume does not exist */

       err = -EEXIST;

       if (ubi->volumes[vol_id]) {

              dbg_err("volume %d already exists", vol_id);

              goto out_unlock;

       }

 

       /* Ensure that the name is unique */

       //确认要创建的volume的名字是唯一的。与已经存在的volume对比

       for (i = 0; i < ubi->vtbl_slots; i++)

              if (ubi->volumes[i] &&

                 ubi->volumes[i]->name_len == req->name_len &&

                  !strcmp(ubi->volumes[i]->name,req->name)) {

                     dbg_err("volume \"%s\" exists (ID%d)", req->name, i);

                     goto out_unlock;

              }

       //根据req->bytes计算需要的物理块数,UBI中操作的基本单元是物理块

       /*Calculate how many eraseblocks are requested */

       vol->usable_leb_size = ubi->leb_size - ubi->leb_size% req->alignment;

       vol->reserved_pebs += div_u64(req->bytes +vol->usable_leb_size - 1,

                                 vol->usable_leb_size);

 

       /* Reserve physical eraseblocks */

       if (vol->reserved_pebs > ubi->avail_pebs) {

              dbg_err("not enough PEBs, only %d available",ubi->avail_pebs);

              err = -ENOSPC;

              goto out_unlock;

       }

       //ubi设备中的可用pebs减少,因为已经分配了新创建的volume

       ubi->avail_pebs -= vol->reserved_pebs;

       ubi->rsvd_pebs += vol->reserved_pebs;

       spin_unlock(&ubi->volumes_lock);

       //初始化新创建的volume的相关信息

       vol->vol_id    =vol_id;

       vol->alignment = req->alignment;

       vol->data_pad  =ubi->leb_size % vol->alignment;

       vol->vol_type  =req->vol_type;

       vol->name_len  =req->name_len;

       memcpy(vol->name, req->name, vol->name_len);

       vol->ubi = ubi;

 

       /*

        * Finish all pendingerases because there may be some LEBs belonging

        * to the same volumeID.

        */

       //刷新UBI后台中pendingworkers

       err = ubi_wl_flush(ubi);

       if (err)

              goto out_acc;

       //创建eba_tbl表,并将其初始化为UBI_LEB_UNMAPPED,只有在对具体的LEB进行写操作的时候才会真正的更新该表中的每一个LEB对应的项

       vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int),GFP_KERNEL);

       if (!vol->eba_tbl) {

              err = -ENOMEM;

              goto out_acc;

       }

 

       for (i = 0; i < vol->reserved_pebs; i++)

              vol->eba_tbl[i] = UBI_LEB_UNMAPPED;

 

       if (vol->vol_type == UBI_DYNAMIC_VOLUME) {

              vol->used_ebs = vol->reserved_pebs;

              vol->last_eb_bytes = vol->usable_leb_size;

              vol->used_bytes =

                     (long long)vol->used_ebs *vol->usable_leb_size;

       }else {

              vol->used_ebs = div_u64_rem(vol->used_bytes,

                                      vol->usable_leb_size,

                                      &vol->last_eb_bytes);

              if (vol->last_eb_bytes != 0)

                     vol->used_ebs += 1;

              else

                     vol->last_eb_bytes = vol->usable_leb_size;

       }

 

       /* Register character device for the volume */

       //ubivolume注册字符接口

       cdev_init(&vol->cdev, &ubi_vol_cdev_operations);

       vol->cdev.owner = THIS_MODULE;

       dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1);

       err = cdev_add(&vol->cdev, dev, 1);

       if (err) {

              ubi_err("cannot add character device");

              goto out_mapping;

       }

 

       vol->dev.release = vol_release;

       vol->dev.parent = &ubi->dev;

       vol->dev.devt = dev;

       vol->dev.class = ubi_class;

       dev_set_name(&vol->dev, "%s_%d",ubi->ubi_name, vol->vol_id);

       err = device_register(&vol->dev);

       if (err) {

              ubi_err("cannot register device");

              goto out_cdev;

       }

       err = volume_sysfs_init(ubi, vol);

       if (err)

              goto out_sysfs;

 

       /* Fill volume table record */

       //ubi中存在一个internalvolume ,其中保持的是每一个volume 的信息,现在新创建了一个volume,就需要更新其中的这个internal volumelayout volume)的信息

       memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record));

       vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs);

       vtbl_rec.alignment     =cpu_to_be32(vol->alignment);

       vtbl_rec.data_pad      =cpu_to_be32(vol->data_pad);

       vtbl_rec.name_len      =cpu_to_be16(vol->name_len);

       if (vol->vol_type == UBI_DYNAMIC_VOLUME)

              vtbl_rec.vol_type = UBI_VID_DYNAMIC;

       else

              vtbl_rec.vol_type = UBI_VID_STATIC;

       memcpy(vtbl_rec.name, vol->name, vol->name_len);

       err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);

通过一个ubi_eba_unmap_leb操作,和一个ubi_eba_write_leb操作来实现了ubifs的写操作,保证了数据的安全性

 

       if (err)

              goto out_sysfs;

 

       spin_lock(&ubi->volumes_lock);

       ubi->volumes[vol_id] = vol;

       ubi->vol_count += 1;

       spin_unlock(&ubi->volumes_lock);

       //通知相关模块,UBI创建了一个新的volume,让它们也采取相应的措施,貌似这个通知联上只有gluebi_notifier

       ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED);

       if (paranoid_check_volumes(ubi))

              dbg_err("check failed while creating volume%d", vol_id);

       return err;

 

out_sysfs:

       /*

        * We have registeredour device, we should not free the volume

        * description object inthis function in case of an error - it is

        * freed by the releasefunction.

        *

        * Get device referenceto prevent the release function from being

        * called just aftersysfs has been closed.

        */

       do_free = 0;

       get_device(&vol->dev);

       volume_sysfs_close(vol);

out_cdev:

       cdev_del(&vol->cdev);

out_mapping:

       if (do_free)

              kfree(vol->eba_tbl);

out_acc:

       spin_lock(&ubi->volumes_lock);

       ubi->rsvd_pebs -= vol->reserved_pebs;

       ubi->avail_pebs += vol->reserved_pebs;

out_unlock:

       spin_unlock(&ubi->volumes_lock);

       if (do_free)

              kfree(vol);

       else

              put_device(&vol->dev);

       ubi_err("cannot create volume %d, error %d", vol_id,err);

       return err;

}

 

三.Mount过程

static int mount_ubifs(struct ubifs_info *c)

static int mount_ubifs(struct ubifs_info *c)

{

       struct super_block *sb= c->vfs_sb;

       int err,mounted_read_only = (sb->s_flags & MS_RDONLY);

       long long x;

       size_t sz;

       err =init_constants_early(c);

       if (err)

              return err;

       err =ubifs_debugging_init(c);

       if (err)

              return err;

       //通过检查vtl表来确定volume是否为空

       err =check_volume_empty(c);

       if (err)

              goto out_free;

       //如果该volume为空,但是只读的话,显然不能写入信息,自然

       //也就不能mount

       if (c->empty&& (mounted_read_only || c->ro_media)) {

              /*

               * This UBI volume is empty, and read-only, orthe file system

               * is mounted read-only - we cannot format it.

               */

              ubifs_err("can'tformat empty UBI volume: read-only %s",

                       c->ro_media ? "UBI volume" :"mount");

              err = -EROFS;

              goto out_free;

       }

 

       if (c->ro_media&& !mounted_read_only) {

              ubifs_err("cannotmount read-write - read-only media");

              err = -EROFS;

              goto out_free;

       }

 

       /*

        * The requirement for the buffer is that itshould fit indexing B-tree

        * height amount of integers. We assume theheight if the TNC tree will

        * never exceed 64.

        */

       err = -ENOMEM;

//bottom_up_buf: a buffer which isused by 'dirty_cow_bottom_up()' in tnc.c,在后面我们会看到在dirty_cow_bottom_up中将znode的所有的ancestors(父节点,父节点的父节点,一直到根节点未知)都设为dirty。所以在标记之前要记录一下所以的ancestors znode。这个bottom_up_buf就是用于这个目的的。

       c->bottom_up_buf =kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL);

       if(!c->bottom_up_buf)

              goto out_free;

       //sbuf: LEB-sizedbuffer to use

       c->sbuf =vmalloc(c->leb_size);

       if (!c->sbuf)

              goto out_free;

 

       if(!mounted_read_only) {

              //@ileb_buf:buffer for commit in-the-gaps method

              c->ileb_buf= vmalloc(c->leb_size);

              if(!c->ileb_buf)

                     gotoout_free;

       }

 

       if (c->bulk_read ==1)

              //初始化bulk-read的信息,关于bulk-read的相关信息可以在通过VFS的读操作中看到详细的解释

              bu_init(c);

 

       /*

        * We have to check all CRCs, even for datanodes, when we mount the FS

        * (specifically, when we are replaying).

        */

       c->always_chk_crc =1;

       //读超级块,如果该volume是空的,显然不存在超级块,这时候需要创建一个最初的文件系统

       err =ubifs_read_superblock(c);

       if (err)

              goto out_free;

 

       /*

        * Make sure the compressor which is set asdefault in the superblock

        * or overridden by mount options is actuallycompiled in.

        */

       if(!ubifs_compr_present(c->default_compr)) {

              ubifs_err("'compressor\"%s\" is not compiled in",

                       ubifs_compr_name(c->default_compr));

              err =-ENOTSUPP;

              goto out_free;

       }

//初始化ubifs的一些常量

       err =init_constants_sb(c);

       if (err)

              goto out_free;

       sz =ALIGN(c->max_idx_node_sz, c->min_io_size);

       sz = ALIGN(sz +c->max_idx_node_sz, c->min_io_size);

       c->cbuf =kmalloc(sz, GFP_NOFS);

       if (!c->cbuf) {

              err = -ENOMEM;

              goto out_free;

       }

       sprintf(c->bgt_name,BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);

       if(!mounted_read_only) {

              err = alloc_wbufs(c);

              if (err)

                     gotoout_cbuf;

 

              /* Createbackground thread */

              //创建UBIFS的后台进程,这个后台进程主要用于基于wbuf的读写

              c->bgt =kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);

              if(IS_ERR(c->bgt)) {

                     err =PTR_ERR(c->bgt);

                     c->bgt= NULL;

                     ubifs_err("cannotspawn \"%s\", error %d",

                              c->bgt_name, err);

                     gotoout_wbufs;

              }

              //唤醒该进程

              wake_up_process(c->bgt);

       }

       err = ubifs_read_master(c);

       //见下面的具体描述

       if (err)

              goto out_free;

       /*

        * Make sure the compressor which is set asdefault in the superblock

        * or overridden by mount options is actuallycompiled in.

        */

       if(!ubifs_compr_present(c->default_compr)) {

              ubifs_err("'compressor\"%s\" is not compiled in",

                       ubifs_compr_name(c->default_compr));

              err =-ENOTSUPP;

              goto out_free;

       }

 

       err =init_constants_sb(c);

       if (err)

              goto out_free;

 

       sz = ALIGN(c->max_idx_node_sz,c->min_io_size);

       sz = ALIGN(sz +c->max_idx_node_sz, c->min_io_size);

       c->cbuf =kmalloc(sz, GFP_NOFS);

       if (!c->cbuf) {

              err = -ENOMEM;

              goto out_free;

       }

 

       sprintf(c->bgt_name,BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);

       if (!mounted_read_only){

              err =alloc_wbufs(c);

              if (err)

                     gotoout_cbuf;

 

              /* Createbackground thread */

              c->bgt =kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);

              if(IS_ERR(c->bgt)) {

                     err =PTR_ERR(c->bgt);

                     c->bgt= NULL;

                     ubifs_err("cannotspawn \"%s\", error %d",

                              c->bgt_name, err);

                     gotoout_wbufs;

              }

              wake_up_process(c->bgt);

       }

 

       err =ubifs_read_master(c);

       if (err)

              gotoout_master;

 

       init_constants_master(c);

 

       if((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {

              ubifs_msg("recoveryneeded");

              c->need_recovery= 1;

              if(!mounted_read_only) {

                     err =ubifs_recover_inl_heads(c, c->sbuf);

                     if (err)

                            gotoout_master;

              }

       } else if(!mounted_read_only) {

              /*

               * Set the "dirty" flag so that if wereboot uncleanly we

               * will notice this immediately on the nextmount.

               */

              c->mst_node->flags|= cpu_to_le32(UBIFS_MST_DIRTY);

              err =ubifs_write_master(c);

              if (err)

                     gotoout_master;

       }

 

       err =ubifs_lpt_init(c, 1, !mounted_read_only);

       if (err)

              goto out_lpt;

 

       err =dbg_check_idx_size(c, c->old_idx_sz);

       if (err)

              goto out_lpt;

 

       err =ubifs_replay_journal(c);

       if (err)

              gotoout_journal;

 

       /* Calculate'min_idx_lebs' after journal replay */

       c->min_idx_lebs =ubifs_calc_min_idx_lebs(c);

 

       err = ubifs_mount_orphans(c,c->need_recovery, mounted_read_only);

       if (err)

              gotoout_orphans;

 

       if(!mounted_read_only) {

              int lnum;

 

              err =check_free_space(c);

              if (err)

                     gotoout_orphans;

 

              /* Check forenough log space */

              lnum =c->lhead_lnum + 1;

              if (lnum >=UBIFS_LOG_LNUM + c->log_lebs)

                     lnum =UBIFS_LOG_LNUM;

              if (lnum ==c->ltail_lnum) {

                     err =ubifs_consolidate_log(c);

                     if (err)

                            gotoout_orphans;

              }

 

              if(c->need_recovery) {

                     err =ubifs_recover_size(c);

                     if (err)

                            gotoout_orphans;

                     err =ubifs_rcvry_gc_commit(c);

              }else {

                     err =take_gc_lnum(c);

                     if (err)

                            gotoout_orphans;

 

                     /*

                      * GC LEB may contain garbage if there was anunclean

                      * reboot, and it should be un-mapped.

                      */

                     err =ubifs_leb_unmap(c, c->gc_lnum);

                     if (err)

                            returnerr;

              }

 

              err =dbg_check_lprops(c);

              if (err)

                     gotoout_orphans;

       }else if (c->need_recovery) {

              err =ubifs_recover_size(c);

              if (err)

                     gotoout_orphans;

       }else {

              /*

               * Even if we mount read-only, we have to setspace in GC LEB

               * to proper value because this affects UBIFSfree space

               * reporting. We do not want to have asituation when

               * re-mounting from R/O to R/W changes amountof free space.

               */

              err =take_gc_lnum(c);

              if (err)

                     goto out_orphans;

       }

 

       spin_lock(&ubifs_infos_lock);

       list_add_tail(&c->infos_list,&ubifs_infos);

       spin_unlock(&ubifs_infos_lock);

 

       if(c->need_recovery) {

              if(mounted_read_only)

                     ubifs_msg("recoverydeferred");

              else {

                     c->need_recovery= 0;

                     ubifs_msg("recoverycompleted");

                     /*

                      * GC LEB has to be empty and taken at thispoint. But

                      * the journal head LEBs may also be accountedas

                      * "empty taken" if they are empty.

                      */

                     ubifs_assert(c->lst.taken_empty_lebs> 0);

              }

       } else

              ubifs_assert(c->lst.taken_empty_lebs> 0);

 

       err =dbg_check_filesystem(c);

       if (err)

              goto out_infos;

 

       err =dbg_debugfs_init_fs(c);

       if (err)

              goto out_infos;

 

       c->always_chk_crc =0;

 

       ubifs_msg("mountedUBI device %d, volume %d, name \"%s\"",

                c->vi.ubi_num, c->vi.vol_id,c->vi.name);

       if (mounted_read_only)

              ubifs_msg("mountedread-only");

       x = (longlong)c->main_lebs * c->leb_size;

       ubifs_msg("filesystem size:   %lld bytes (%lld KiB, %lldMiB, %d "

                "LEBs)", x, x >> 10, x>> 20, c->main_lebs);

       x = (longlong)c->log_lebs * c->leb_size + c->max_bud_bytes;

       ubifs_msg("journalsize:       %lld bytes (%lld KiB, %lldMiB, %d "

                "LEBs)", x, x >> 10, x>> 20, c->log_lebs + c->max_bud_cnt);

       ubifs_msg("mediaformat:       w%d/r%d (latest isw%d/r%d)",

                c->fmt_version, c->ro_compat_version,

                UBIFS_FORMAT_VERSION,UBIFS_RO_COMPAT_VERSION);

       ubifs_msg("defaultcompressor: %s", ubifs_compr_name(c->default_compr));

       ubifs_msg("reservedfor root:  %llu bytes (%llu KiB)",

              c->report_rp_size,c->report_rp_size >> 10);

 

       dbg_msg("compiledon:         " __DATE__ " at" __TIME__);

       dbg_msg("min. I/Ounit size:  %d bytes",c->min_io_size);

       dbg_msg("LEBsize:            %d bytes (%d KiB)",

              c->leb_size,c->leb_size >> 10);

       dbg_msg("datajournal heads:  %d",

              c->jhead_cnt- NONDATA_JHEADS_CNT);

       dbg_msg("UUID:                %02X%02X%02X%02X-%02X%02X"

             "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",

              c->uuid[0], c->uuid[1],c->uuid[2], c->uuid[3],

              c->uuid[4], c->uuid[5],c->uuid[6], c->uuid[7],

              c->uuid[8], c->uuid[9],c->uuid[10], c->uuid[11],

              c->uuid[12], c->uuid[13],c->uuid[14], c->uuid[15]);

       dbg_msg("big_lpt              %d", c->big_lpt);

       dbg_msg("logLEBs:            %d (%d - %d)",

              c->log_lebs,UBIFS_LOG_LNUM, c->log_last);

       dbg_msg("LPT areaLEBs:       %d (%d - %d)",

              c->lpt_lebs,c->lpt_first, c->lpt_last);

       dbg_msg("orphanarea LEBs:    %d (%d - %d)",

              c->orph_lebs,c->orph_first, c->orph_last);

       dbg_msg("mainarea LEBs:      %d (%d - %d)",

              c->main_lebs,c->main_first, c->leb_cnt - 1);

       dbg_msg("indexLEBs:          %d",c->lst.idx_lebs);

       dbg_msg("totalindex bytes:   %lld (%lld KiB, %lldMiB)",

              c->old_idx_sz,c->old_idx_sz >> 10, c->old_idx_sz >> 20);

       dbg_msg("key hashtype:       %d",c->key_hash_type);

       dbg_msg("treefanout:         %d", c->fanout);

       dbg_msg("reservedGC LEB:     %d", c->gc_lnum);

       dbg_msg("firstmain LEB:      %d",c->main_first);

       dbg_msg("max.znode size      %d",c->max_znode_sz);

       dbg_msg("max.index node size %d", c->max_idx_node_sz);

       dbg_msg("nodesizes:          data %zu, inode %zu,dentry %zu",

              UBIFS_DATA_NODE_SZ,UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);

       dbg_msg("nodesizes:          trun %zu, sb %zu, master%zu",

              UBIFS_TRUN_NODE_SZ,UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);

       dbg_msg("nodesizes:          ref %zu, cmt. start %zu,orph %zu",

              UBIFS_REF_NODE_SZ,UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);

       dbg_msg("max.node sizes:     data %zu, inode %zudentry %zu",

               UBIFS_MAX_DATA_NODE_SZ,UBIFS_MAX_INO_NODE_SZ,

              UBIFS_MAX_DENT_NODE_SZ);

       dbg_msg("deadwatermark:      %d", c->dead_wm);

       dbg_msg("darkwatermark:      %d", c->dark_wm);

       dbg_msg("LEBoverhead:        %d",c->leb_overhead);

       x = (longlong)c->main_lebs * c->dark_wm;

       dbg_msg("max.dark space:     %lld (%lld KiB, %lldMiB)",

              x, x >>10, x >> 20);

       dbg_msg("maximumbud bytes:   %lld (%lld KiB, %lldMiB)",

              c->max_bud_bytes,c->max_bud_bytes >> 10,

              c->max_bud_bytes>> 20);

       dbg_msg("BGcommit bud bytes: %lld (%lld KiB, %lld MiB)",

              c->bg_bud_bytes,c->bg_bud_bytes >> 10,

              c->bg_bud_bytes>> 20);

       dbg_msg("currentbud bytes    %lld (%lld KiB, %lldMiB)",

              c->bud_bytes,c->bud_bytes >> 10, c->bud_bytes >> 20);

       dbg_msg("max.seq. number:    %llu",c->max_sqnum);

       dbg_msg("commitnumber:       %llu", c->cmt_no);

 

       return 0;

 

out_infos:

       spin_lock(&ubifs_infos_lock);

       list_del(&c->infos_list);

       spin_unlock(&ubifs_infos_lock);

out_orphans:

       free_orphans(c);

out_journal:

       destroy_journal(c);

out_lpt:

       ubifs_lpt_free(c, 0);

out_master:

       kfree(c->mst_node);

       kfree(c->rcvrd_mst_node);

       if (c->bgt)

              kthread_stop(c->bgt);

out_wbufs:

       free_wbufs(c);

out_cbuf:

       kfree(c->cbuf);

out_free:

       kfree(c->bu.buf);

       vfree(c->ileb_buf);

       vfree(c->sbuf);

       kfree(c->bottom_up_buf);

       ubifs_debugging_exit(c);

       return err;

}

 

3.1 ubifs_read_superblock

int ubifs_read_superblock(struct ubifs_info *c)

{

       int err, sup_flags;

       struct ubifs_sb_node*sup;

       //如果前面扫描的时候发现该卷中的LEB全部没有map,因此是一个空卷,什么信息都没有,这时候需要建立一个最原始的文件系统,其实就是写入superblock节点(LEB0),master节点(LEB1,和LEB2),commit节点(LEB3),inode节点(main_first+1),index节点(main_first+0)。

       //对于这些节点,我觉得很有必要详细的描述一下。我们都知道每一个文件系统都有一个超级块,里面存放的是文件系统的基本信息,在这儿ubifs将超级块以superblock类型节点的形式写进了flash media

       //从《a brief introduce of ubi and ubifs》的文档中可以看出。为了垃圾回收,采用node-structure的形式组织文件,jiffs2中这些相关的数据结构是在mount的时候建立的,这样花费了大量的时间和内存资源,而ubifs中这些数据是保存在flash media中的。Master节点就是这样的树状信息的根节点。Master节点是一式两份的,分别保存在LEB1LEB2上。为什么需要两份呢?

因为文件更新的时候,B+tree中的数据会变的,相应的master也就需要更新,为了防止在更新master的时候发生unclean reboot导致数据被破坏,所以保存了两份,用于unclean reboot时候的数据恢复。

       if (c->empty) {

              err =create_default_filesystem(c);

              if (err)

                     returnerr;

       }

       //读出超级块,当然这个超级块有可能是上面的create_default_filesystem刚刚写进去的。

       sup =ubifs_read_sb_node(c);

       if (IS_ERR(sup))

              returnPTR_ERR(sup);

 

       c->fmt_version =le32_to_cpu(sup->fmt_version);

       c->ro_compat_version= le32_to_cpu(sup->ro_compat_version);

 

       /*

        * The software supports all previous versionsbut not future versions,

        * due to the unavailability of time-travellingequipment.

        */

       if (c->fmt_version> UBIFS_FORMAT_VERSION) {

              structsuper_block *sb = c->vfs_sb;

              int mounting_ro= sb->s_flags & MS_RDONLY;

 

              ubifs_assert(!c->ro_media|| mounting_ro);

              if(!mounting_ro ||

                  c->ro_compat_version >UBIFS_RO_COMPAT_VERSION) {

                     ubifs_err("on-flashformat version is w%d/r%d, but "

                              "software only supports up to version "

                              "w%d/r%d", c->fmt_version,

                              c->ro_compat_version,UBIFS_FORMAT_VERSION,

                              UBIFS_RO_COMPAT_VERSION);

                     if(c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {

                            ubifs_msg("onlyR/O mounting is possible");

                            err= -EROFS;

                     } else

                            err= -EINVAL;

                     gotoout;

              }

 

              /*

               * The FS is mounted R/O, and the media formatis

               * R/O-compatible with the UBIFSimplementation, so we can

               * mount.

               */

              c->rw_incompat= 1;

       }

 

       if (c->fmt_version< 3) {

              ubifs_err("on-flashformat version %d is not supported",

                       c->fmt_version);

              err = -EINVAL;

              goto out;

       }

 

//采用哪种hash运算方法   

switch (sup->key_hash) {

       caseUBIFS_KEY_HASH_R5:

              c->key_hash= key_r5_hash;

              c->key_hash_type= UBIFS_KEY_HASH_R5;

              break;

 

       caseUBIFS_KEY_HASH_TEST:

              c->key_hash= key_test_hash;

              c->key_hash_type= UBIFS_KEY_HASH_TEST;

              break;

       };

 

       c->key_fmt =sup->key_fmt;

 

       switch (c->key_fmt){

       caseUBIFS_SIMPLE_KEY_FMT:

              c->key_len =UBIFS_SK_LEN;

              break;

       default:

              ubifs_err("unsupportedkey format");

              err = -EINVAL;

              goto out;

       }

       //用从超级块中读出的信息来初始化内存中的ubifs_info结构体

       c->leb_cnt       = le32_to_cpu(sup->leb_cnt);

       c->max_leb_cnt   = le32_to_cpu(sup->max_leb_cnt);

       c->max_bud_bytes =le64_to_cpu(sup->max_bud_bytes);

       c->log_lebs      = le32_to_cpu(sup->log_lebs);

       c->lpt_lebs      = le32_to_cpu(sup->lpt_lebs);

       c->orph_lebs     = le32_to_cpu(sup->orph_lebs);

       c->jhead_cnt     = le32_to_cpu(sup->jhead_cnt) +NONDATA_JHEADS_CNT;

       c->fanout        = le32_to_cpu(sup->fanout);

       c->lsave_cnt     = le32_to_cpu(sup->lsave_cnt);

       c->rp_size       = le64_to_cpu(sup->rp_size);

       c->rp_uid        = le32_to_cpu(sup->rp_uid);

       c->rp_gid        = le32_to_cpu(sup->rp_gid);

       sup_flags        = le32_to_cpu(sup->flags);

       if(!c->mount_opts.override_compr)

              c->default_compr= le16_to_cpu(sup->default_compr);

 

       c->vfs_sb->s_time_gran= le32_to_cpu(sup->time_gran);

       memcpy(&c->uuid,&sup->uuid, 16);

       c->big_lpt =!!(sup_flags & UBIFS_FLG_BIGLPT);

 

       /* Automaticallyincrease file system size to the maximum size */

       //ubivolume是可以resize的,即可以改变大小。此时需要重新写超级块

       c->old_leb_cnt =c->leb_cnt;

       if (c->leb_cnt <c->vi.size && c->leb_cnt < c->max_leb_cnt) {

              c->leb_cnt =min_t(int, c->max_leb_cnt, c->vi.size);

              if(c->vfs_sb->s_flags & MS_RDONLY)

                     dbg_mnt("Autoresizing (ro) from %d LEBs to %d LEBs",

                            c->old_leb_cnt,      c->leb_cnt);

              else {

                     dbg_mnt("Autoresizing (sb) from %d LEBs to %d LEBs",

                            c->old_leb_cnt,c->leb_cnt);

                     sup->leb_cnt= cpu_to_le32(c->leb_cnt);

                     err =ubifs_write_sb_node(c, sup);

                     if (err)

                            gotoout;

                     c->old_leb_cnt= c->leb_cnt;

              }

       }

 

       c->log_bytes =(long long)c->log_lebs * c->leb_size;

       c->log_last =UBIFS_LOG_LNUM + c->log_lebs - 1;

       c->lpt_first =UBIFS_LOG_LNUM + c->log_lebs;

       c->lpt_last =c->lpt_first + c->lpt_lebs - 1;

       c->orph_first =c->lpt_last + 1;

       c->orph_last =c->orph_first + c->orph_lebs - 1;

       c->main_lebs =c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;

       c->main_lebs -=c->log_lebs + c->lpt_lebs + c->orph_lebs;

       c->main_first =c->leb_cnt - c->main_lebs;

 

       err = validate_sb(c,sup);

out:

       kfree(sup);

       return err;

}

 

3.2create_default_filesystem

static int create_default_filesystem(struct ubifs_info *c)

{

       struct ubifs_sb_node*sup;

       struct ubifs_mst_node*mst;

       struct ubifs_idx_node*idx;

       struct ubifs_branch*br;

       struct ubifs_ino_node*ino;

       struct ubifs_cs_node*cs;

       union ubifs_key key;

       int err, tmp,jnl_lebs, log_lebs, max_buds, main_lebs, main_first;

       int lpt_lebs,lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;

       int min_leb_cnt =UBIFS_MIN_LEB_CNT;

       long long tmp64,main_bytes;

       __le64 tmp_le64;

 

       /* Some functionscalled from here depend on the @c->key_len filed */

       c->key_len =UBIFS_SK_LEN;

 

       /*

        * First of all, we have to calculate defaultfile-system geometry -

        * log size, journal size, etc.

        */

       //首先根据文件系统的大小算相应的journallog区的大小。Journal的目的前面可能已经提到了,因为ubifs的文件的B+tree的数据是保存在flash media中,这就带来了一个问题,每次更新文件的时候都需要更新相关的B+tree的信息,这样就会频繁的读写flash设备,降低文件系统的性能。所以采用了joural,也就是说在更新的时候先将更新相关inode的信息写进log中,在log满了的时候才一起更新flash media中的B+tree。这样降低了更新的频率,提高了文件系统的性能。

       if (c->leb_cnt <0x7FFFFFFF / DEFAULT_JNL_PERCENT)

              /* We can firstmultiply then divide and have no overflow */

              jnl_lebs =c->leb_cnt * DEFAULT_JNL_PERCENT / 100;

       else

              jnl_lebs =(c->leb_cnt / 100) * DEFAULT_JNL_PERCENT;

 

       if (jnl_lebs <UBIFS_MIN_JNL_LEBS)

              jnl_lebs =UBIFS_MIN_JNL_LEBS;

       if (jnl_lebs *c->leb_size > DEFAULT_MAX_JNL)

              jnl_lebs =DEFAULT_MAX_JNL / c->leb_size;

 

       /*

        * The log should be large enough to fitreference nodes for all bud

        * LEBs. Because buds do not have to start fromthe beginning of LEBs

        * (half of the LEB may contain committeddata), the log should

        * generally be larger, make it twice as large.

        */

       tmp = 2 *(c->ref_node_alsz * jnl_lebs) + c->leb_size - 1;

       log_lebs = tmp /c->leb_size;

       /* Plus one LEBreserved for commit */

       log_lebs += 1;

       if (c->leb_cnt -min_leb_cnt > 8) {

              /* And someextra space to allow writes while committing */

              log_lebs += 1;

              min_leb_cnt +=1;

       }

 

       max_buds = jnl_lebs -log_lebs;

       if (max_buds <UBIFS_MIN_BUD_LEBS)

              max_buds =UBIFS_MIN_BUD_LEBS;

 

       /*

        * Orphan nodes are stored in a separate area.One node can store a lot

        * of orphan inode numbers, but when new orphancomes we just add a new

        * orphan node. At some point the nodes areconsolidated into one

        * orphan node.

        */

       //An orphan is an inodenumber whose inode node has been committed to the index with a link count ofzero. That happens when an open file is deleted (unlinked) and then a commit isrun

       // The orphan area is afixed number of LEBs situated between the LPT area and the main area

       // orphan 顾名思义是指牺牲者,在ubifs中的当一inode的引用为零的时候,这个文件需要被删除,为了防止在删除的时候发生unclean rebootubifs将这些需要删除的文件信息写在orphan area中,这样在发生unclean reboot的时候文件系统可以清楚的知道哪些文件需要被删除,而不是去扫描整个分区。文件系统在没有空余空间的时候也可以通过GC子系统来回收这些空间。关于orphan 的相关信息就保存在orphan area中,The orphan area is a fixed numberof LEBs situated between the LPT area and the main area

       orph_lebs =UBIFS_MIN_ORPH_LEBS;

#ifdef CONFIG_UBIFS_FS_DEBUG

       if (c->leb_cnt -min_leb_cnt > 1)

              /*

               * For debugging purposes it is better to haveat least 2

               * orphan LEBs, because the orphan subsystemwould need to do

               * consolidations and would be stressed more.

               */

              orph_lebs += 1;

#endif

 

       main_lebs =c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;

       main_lebs -=orph_lebs;

       //上面提到了,orphan区处于LPT区和main area之间。什么是LPTLPT= LEB Properties Tree

       lpt_first =UBIFS_LOG_LNUM + log_lebs;

       c->lsave_cnt =DEFAULT_LSAVE_CNT;

       c->max_leb_cnt =c->leb_cnt;

       err = ubifs_create_dflt_lpt(c, &main_lebs,lpt_first, &lpt_lebs,

                                &big_lpt);

*********************************************************************************

ubifs_create_dflt_lpt算出LPT需要占用几块LEB,LPT是描述的ubifs中每一个leb的空闲bytesdirty (这儿的脏好像并不是指被修改的意思,从代码pnode->lprops[0].dirty = iopos - node_sz;中大体的意思为没有被写,但是别人不能用的空间,因为flash操作的基本单元是page,如果在某一页中只写了一半的数据,那么另外一半就是脏的,虽然没有写东西,但是别人也用不了, Dirty space is thenumber of bytes taken up by obsolete nodes and padding, that can potentially bereclaimed by garbage collectionbytes。因为LPT区自己也占用了LEB,所以需要建立LPT自己的表。这想内核在启动的过程中建立自己的页表一样

a)        为跟index节点和根inode节点所占的leb创建LEB properties

b)       为其余所有的pnode节点建立信息,同时将信息写入flash media

 

**********************************************************************************

if (err)

              return err;

       dbg_gen("LEBProperties Tree created (LEBs %d-%d)", lpt_first,

              lpt_first +lpt_lebs - 1);

 

       main_first =c->leb_cnt - main_lebs;

 

       /* Create defaultsuperblock */

       tmp =ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);

       sup = kzalloc(tmp,GFP_KERNEL);

       if (!sup)

              return -ENOMEM;

 

       tmp64 = (longlong)max_buds * c->leb_size;

       if (big_lpt)

              sup_flags |=UBIFS_FLG_BIGLPT;

       //初始化superblock节点

       sup->ch.node_type  = UBIFS_SB_NODE;

       sup->key_hash      = UBIFS_KEY_HASH_R5;

       sup->flags         = cpu_to_le32(sup_flags);

       sup->min_io_size   = cpu_to_le32(c->min_io_size);

       sup->leb_size      = cpu_to_le32(c->leb_size);

       sup->leb_cnt       = cpu_to_le32(c->leb_cnt);

       sup->max_leb_cnt   = cpu_to_le32(c->max_leb_cnt);

       sup->max_bud_bytes= cpu_to_le64(tmp64);

       sup->log_lebs      = cpu_to_le32(log_lebs);

       sup->lpt_lebs      = cpu_to_le32(lpt_lebs);

       sup->orph_lebs     = cpu_to_le32(orph_lebs);

       sup->jhead_cnt     = cpu_to_le32(DEFAULT_JHEADS_CNT);

       sup->fanout        = cpu_to_le32(DEFAULT_FANOUT);

       sup->lsave_cnt     = cpu_to_le32(c->lsave_cnt);

       sup->fmt_version   = cpu_to_le32(UBIFS_FORMAT_VERSION);

       sup->time_gran     = cpu_to_le32(DEFAULT_TIME_GRAN);

       if(c->mount_opts.override_compr)

              sup->default_compr= cpu_to_le16(c->mount_opts.compr_type);

       else

              sup->default_compr= cpu_to_le16(UBIFS_COMPR_LZO);

 

       generate_random_uuid(sup->uuid);

 

       main_bytes = (longlong)main_lebs * c->leb_size;

       tmp64 = div_u64(main_bytes* DEFAULT_RP_PERCENT, 100);

       if (tmp64 >DEFAULT_MAX_RP_SIZE)

              tmp64 =DEFAULT_MAX_RP_SIZE;

       sup->rp_size =cpu_to_le64(tmp64);

       sup->ro_compat_version= cpu_to_le32(UBIFS_RO_COMPAT_VERSION);

       //写入superblock 节点到LEB0

       err =ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);

       kfree(sup);

       if (err)

              return err;

 

       dbg_gen("defaultsuperblock created at LEB 0:0");

 

       /* Create defaultmaster node */

       mst =kzalloc(c->mst_node_alsz, GFP_KERNEL);

       if (!mst)

              return -ENOMEM;

       //初始化master节点

       mst->ch.node_type =UBIFS_MST_NODE;

       mst->log_lnum     = cpu_to_le32(UBIFS_LOG_LNUM);

       mst->highest_inum =cpu_to_le64(UBIFS_FIRST_INO);

       mst->cmt_no       = 0;

       mst->root_lnum    = cpu_to_le32(main_first +DEFAULT_IDX_LEB);

       mst->root_offs    = 0;

       tmp =ubifs_idx_node_sz(c, 1);

       mst->root_len     = cpu_to_le32(tmp);

       mst->gc_lnum      = cpu_to_le32(main_first +DEFAULT_GC_LEB);

       mst->ihead_lnum   = cpu_to_le32(main_first + DEFAULT_IDX_LEB);

       mst->ihead_offs   = cpu_to_le32(ALIGN(tmp,c->min_io_size));

       mst->index_size   = cpu_to_le64(ALIGN(tmp, 8));

       mst->lpt_lnum     = cpu_to_le32(c->lpt_lnum);

       mst->lpt_offs     = cpu_to_le32(c->lpt_offs);

       mst->nhead_lnum   = cpu_to_le32(c->nhead_lnum);

       mst->nhead_offs   = cpu_to_le32(c->nhead_offs);

       mst->ltab_lnum    = cpu_to_le32(c->ltab_lnum);

       mst->ltab_offs    = cpu_to_le32(c->ltab_offs);

       mst->lsave_lnum   = cpu_to_le32(c->lsave_lnum);

       mst->lsave_offs   = cpu_to_le32(c->lsave_offs);

       mst->lscan_lnum   = cpu_to_le32(main_first);

       mst->empty_lebs   = cpu_to_le32(main_lebs - 2);

       mst->idx_lebs     = cpu_to_le32(1);

       mst->leb_cnt      = cpu_to_le32(c->leb_cnt);

 

       /* Calculate lpropsstatistics */

       tmp64 = main_bytes;

       tmp64 -=ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);

       tmp64 -=ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);

       mst->total_free =cpu_to_le64(tmp64);

 

       tmp64 =ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);

       ino_waste =ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) -

                       UBIFS_INO_NODE_SZ;

       tmp64 += ino_waste;

       tmp64 -=ALIGN(ubifs_idx_node_sz(c, 1), 8);

       mst->total_dirty =cpu_to_le64(tmp64);

 

       /*  The indexing LEB does not contribute to darkspace */

       tmp64 =(c->main_lebs - 1) * c->dark_wm;

       mst->total_dark =cpu_to_le64(tmp64);

 

       mst->total_used =cpu_to_le64(UBIFS_INO_NODE_SZ);

        //master节点一式两份

       err = ubifs_write_node(c,mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,

                            UBI_UNKNOWN);

       if (err) {

              kfree(mst);

              return err;

       }

       err =ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0,

                            UBI_UNKNOWN);

       kfree(mst);

       if (err)

              return err;

 

       dbg_gen("defaultmaster node created at LEB %d:0", UBIFS_MST_LNUM);

 

       /* Create the rootindexing node */

       tmp =ubifs_idx_node_sz(c, 1);

       //idx节点。从tnc.c中的描述操作,idx的成员zbranch以及make_idx_node函数看来,idx节点是用来在flash media中保存TNC树的

内核用struct ubifs_znode结构体来代表着flash中的一个idx 节点。Idx节点的孩子代表真正的数据,当然这些数据本身可以是一个idx节点,也可以是当初的数据。

这儿初始化的是TNC的根节点。

       //《a brief introduce of ubi and ubifs》中说inode节点和它的数据是分开的,上面的idx节点其实是存放的数据。那么struct ubifs_ino_node类型的节点是存放的inode吗?(yes)

       // In UBIFS, inodes have a correspondinginode node which records the number of directory entry links, moresimply known as the link count.

       // inode node is a node that holds themetadata for an inode. Every inode has

exactly one (non-obsolete) inode node.

       idx =kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);

       if (!idx)

              return -ENOMEM;

 

       c->key_fmt =UBIFS_SIMPLE_KEY_FMT;

       c->key_hash =key_r5_hash;

 

       idx->ch.node_type =UBIFS_IDX_NODE;

       idx->child_cnt =cpu_to_le16(1);

       ino_key_init(c,&key, UBIFS_ROOT_INO);

       br =ubifs_idx_branch(c, idx, 0);

       key_write_idx(c,&key, &br->key);

       br->lnum =cpu_to_le32(main_first + DEFAULT_DATA_LEB);

       br->len  = cpu_to_le32(UBIFS_INO_NODE_SZ);

       err =ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0,

                            UBI_UNKNOWN);

       kfree(idx);

       if (err)

              return err;

 

       dbg_gen("defaultroot indexing node created LEB %d:0",

              main_first +DEFAULT_IDX_LEB);

 

       /* Create default rootinode */

       tmp =ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);

       ino = kzalloc(tmp,GFP_KERNEL);

       if (!ino)

              return -ENOMEM;

 

       ino_key_init_flash(c,&ino->key, UBIFS_ROOT_INO);

       ino->ch.node_type =UBIFS_INO_NODE;

       ino->creat_sqnum =cpu_to_le64(++c->max_sqnum);

       ino->nlink =cpu_to_le32(2);

       tmp_le64 =cpu_to_le64(CURRENT_TIME_SEC.tv_sec);

       ino->atime_sec   = tmp_le64;

       ino->ctime_sec   = tmp_le64;

       ino->mtime_sec   = tmp_le64;

       ino->atime_nsec  = 0;

       ino->ctime_nsec  = 0;

       ino->mtime_nsec  = 0;

       ino->mode =cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);

       ino->size =cpu_to_le64(UBIFS_INO_NODE_SZ);

 

       /* Set compressionenabled by default */

       ino->flags =cpu_to_le32(UBIFS_COMPR_FL);

 

       err =ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,

                                   main_first + DEFAULT_DATA_LEB, 0,

                            UBI_UNKNOWN);

       kfree(ino);

       if (err)

              return err;

 

       dbg_gen("rootinode created at LEB %d:0",

              main_first +DEFAULT_DATA_LEB);

 

       /*

        * The first node in the log has to be thecommit start node. This is

        * always the case during normal file-systemoperation. Write a fake

        * commit start node to the log.

        */

       tmp =ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);

       cs = kzalloc(tmp,GFP_KERNEL);

       if (!cs)

              return -ENOMEM;

 

       cs->ch.node_type =UBIFS_CS_NODE;

       //log区域写入一个commit start node,每一次commit的时候会向log区域写入两种类型,一种就是commit start类型的节点表示一次commit的开始,两外一种就是referencr 节点,里面记录了相应的日志需要操作的leb,和offset

       err =ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM,

                            0, UBI_UNKNOWN);

       kfree(cs);

       ubifs_msg("defaultfile-system created");

       return 0;

}

 

3.3 ubifs_read_master

ubifs文件系统的master节点,我们前面提到了master节点是一式两份的,因为它里面保存的是idx的最基本的东西,不容有失。而且master节点是不能同时写的,防止unclean reboot使得两份数据同时被破坏

int ubifs_read_master(struct ubifs_info *c)

{

       int err, old_leb_cnt;

       c->mst_node =kzalloc(c->mst_node_alsz, GFP_KERNEL);

       if (!c->mst_node)

              return -ENOMEM;

       //检查两份master节点,看是master中的数据是否被破坏。

       err =scan_for_master(c);

       if (err) {

              if (err ==-EUCLEAN)

                     //如果被破坏,那么就需要恢复

                     err =ubifs_recover_master_node(c);

              if (err)

                     /*

                      * Note, we do not free 'c->mst_node' herebecause the

                      * unmount routine will take care of this.

                      */

                     returnerr;

       }

 

       /* Make sure that therecovery flag is clear */

       //master节点来初始化ubifs_info结构体中的信息

       c->mst_node->flags&= cpu_to_le32(~UBIFS_MST_RCVRY);

       c->max_sqnum       =le64_to_cpu(c->mst_node->ch.sqnum);

       c->highest_inum    =le64_to_cpu(c->mst_node->highest_inum);

       c->cmt_no          = le64_to_cpu(c->mst_node->cmt_no);

       c->zroot.lnum      =le32_to_cpu(c->mst_node->root_lnum);

       c->zroot.offs      =le32_to_cpu(c->mst_node->root_offs);

       c->zroot.len       =le32_to_cpu(c->mst_node->root_len);

       c->lhead_lnum      =le32_to_cpu(c->mst_node->log_lnum);

       c->gc_lnum         =le32_to_cpu(c->mst_node->gc_lnum);

       c->ihead_lnum      =le32_to_cpu(c->mst_node->ihead_lnum);

       c->ihead_offs      =le32_to_cpu(c->mst_node->ihead_offs);

       c->old_idx_sz      =le64_to_cpu(c->mst_node->index_size);

       c->lpt_lnum        =le32_to_cpu(c->mst_node->lpt_lnum);

       c->lpt_offs        =le32_to_cpu(c->mst_node->lpt_offs);

       c->nhead_lnum      =le32_to_cpu(c->mst_node->nhead_lnum);

       c->nhead_offs      =le32_to_cpu(c->mst_node->nhead_offs);

       c->ltab_lnum       = le32_to_cpu(c->mst_node->ltab_lnum);

       c->ltab_offs       =le32_to_cpu(c->mst_node->ltab_offs);

       c->lsave_lnum      =le32_to_cpu(c->mst_node->lsave_lnum);

       c->lsave_offs      =le32_to_cpu(c->mst_node->lsave_offs);

       c->lscan_lnum      =le32_to_cpu(c->mst_node->lscan_lnum);

       c->lst.empty_lebs  = le32_to_cpu(c->mst_node->empty_lebs);

       c->lst.idx_lebs    = le32_to_cpu(c->mst_node->idx_lebs);

       old_leb_cnt        =le32_to_cpu(c->mst_node->leb_cnt);

       c->lst.total_free  = le64_to_cpu(c->mst_node->total_free);

       c->lst.total_dirty= le64_to_cpu(c->mst_node->total_dirty);

       c->lst.total_used  = le64_to_cpu(c->mst_node->total_used);

       c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead);

       c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark);

 

       c->calc_idx_sz =c->old_idx_sz;

 

       if(c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))

              c->no_orphs= 1;

 

       if (old_leb_cnt !=c->leb_cnt) {

              /* The filesystem has been resized */

              int growth =c->leb_cnt - old_leb_cnt;

 

              if(c->leb_cnt < old_leb_cnt ||

                  c->leb_cnt < UBIFS_MIN_LEB_CNT) {

                     ubifs_err("badleb_cnt on master node");

                     dbg_dump_node(c,c->mst_node);

                     return-EINVAL;

              }

              dbg_mnt("Autoresizing (master) from %d LEBs to %d LEBs",

                     old_leb_cnt,c->leb_cnt);

              c->lst.empty_lebs+= growth;

              c->lst.total_free+= growth * (long long)c->leb_size;

              c->lst.total_dark+= growth * (long long)c->dark_wm;

              /*

               * Reflect changes back onto the master node.N.B. the master

               * node gets written immediately whenevermounting (or

               * remounting) in read-write mode, so we do notneed to write it

               * here.

               */

              c->mst_node->leb_cnt= cpu_to_le32(c->leb_cnt);

              c->mst_node->empty_lebs= cpu_to_le32(c->lst.empty_lebs);

              c->mst_node->total_free= cpu_to_le64(c->lst.total_free);

              c->mst_node->total_dark= cpu_to_le64(c->lst.total_dark);

       }

       err =validate_master(c);

       if (err)

              return err;

       err =dbg_old_index_check_init(c, &c->zroot);

       return err;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值