sys_umount系统调用的实现注释。2.4版内核

最新推荐文章于 2023-06-25 20:53:26 发布

iteye_17655

最新推荐文章于 2023-06-25 20:53:26 发布

阅读量165

点赞数

文章标签：运维数据结构与算法

本文链接：https://blog.csdn.net/iteye_17655/article/details/82269377

版权

sys_umount系统调用的实现注释。2.4版内核
2011年05月20日
　　sys_umount系统调用的实现注释。2.4版内核 /* * Now umount can handle mount points as well as block devices. * This is important for filesystems which use unnamed block devices. * * We now support a flag for forced unmount like the other 'big iron' * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD */ //文件系统卸载的系统调用 asmlinkage long sys_umount(char * name, int flags) { struct nameidata nd; char *kname; int retval; lock_kernel(); kname = getname(name); retval = PTR_ERR(kname); if (IS_ERR(kname)) goto out; retval = 0; //kulv:得到卸载目录的nameidata结构。其内有该目录的dentry , inode if (path_init(kname, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd)) retval = path_walk(kname, &nd); putname(kname); if (retval) goto out; retval = -EINVAL; if (nd.dentry != nd.mnt->mnt_root)//mnt 是代表该安装点的连接结构，此结构的mnt_root指向了设备的根节点 goto dput_and_out;//如果该安装目录的dentry结构不等于设备的根节点，则一定有问题。 retval = -EPERM; if (!capable(CAP_SYS_ADMIN) && current->uid!=nd.mnt->mnt_owner) goto dput_and_out;//如果不是管理员，且不是拥有者。 dput(nd.dentry);//减少使用计数 /* puts nd.mnt */ down(&mount_sem); retval = do_umount(nd.mnt, 0, flags);//真正工作的地方. up(&mount_sem); goto out; dput_and_out: path_release(&nd); out: unlock_kernel(); return retval; } /*kulv: mnt为该安装点的连接结构。包含了设备目录，超级块，以及安装树的支持结构。 umount_root = 0 ； flags:从sys_umount 传下来 */ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) { struct super_block * sb = mnt->mnt_sb;//得到超级块 /* * No sense to grab the lock for this test, but test itself looks * somewhat bogus. Suggestions for better replacement? * Ho-hum... In principle, we might treat that as umount + switch * to rootfs. GC would eventually take care of the old vfsmount. * The problem being: we have to implement rootfs and GC for that ;-) * Actually it makes sense, especially if rootfs would contain a * /reboot - static binary that would close all descriptors and * call reboot(9). Then init(8) could umount root and exec /reboot. */ if (mnt == current->fs->rootmnt && !umount_root) { int retval = 0;//如果卸载的目录为当前进程根目录， /* * Special case for "unmounting" root ... * we just try to remount it readonly. */ mntput(mnt); if (!(sb->s_flags & MS_RDONLY))//因为调用这的时候flag为0，所以下面不成立,用户进程无法卸载自己的根目录 retval = do_remount_sb(sb, MS_RDONLY, 0); return retval; } spin_lock(&dcache_lock); //如果一个设备安装了多次，即一个超级块有多个连接结构，那么 if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {//如果安装了多次，即其 if (atomic_read(&mnt->mnt_count) > 2) {//引用计数大于2，表面还有其他地方用到这个结构，那么... spin_unlock(&dcache_lock); mntput(mnt);//递减一下计数就可以了 return -EBUSY; } if (sb->s_type->fs_flags & FS_SINGLE)//FS_SINGLE表示超级块在同种文件系统中是共享的 put_filesystem(sb->s_type); /* We hold two references, so mntput() is safe */ mntput(mnt); //因为这个超级块有多次安装，//即设备有多次安装，不能将设备拆下，只是拆除多次安装的一次 //下面的函数只是把当前连接件从各种队列中删除，然后递减相关计数，这样就无法访问到了 remove_vfsmnt(mnt); return 0; } spin_unlock(&dcache_lock); //下面代表设备的唯一一次安装，那就麻烦点了 /* * Before checking whether the filesystem is still busy, * make sure the kernel doesn't hold any quota files open * on the device. If the umount fails, too bad -- there * are no quotas running any more. Just turn them on again. */ DQUOT_OFF(sb);//关于这个磁盘空间的配额。 acct_auto_close(sb->s_dev); /* * If we may have to abort operations to get out of this * mount, and they will themselves hold resources we must * allow the fs to do things. In the Unix tradition of * 'Gee thats tricky lets do it in userspace' the umount_begin * might fail to complete on the first run through as other tasks * must return, and the like. Thats for the mount program to worry * about for the moment. */ if( (flags&MNT_FORCE) && sb->s_op->umount_begin) sb->s_op->umount_begin(sb);//让相应的文件系统来处理卸载准备工作 //struct file_operations 定义在:include\linux\fs.h里面 /* * Shrink dcache, then fsync. This guarantees that if the * filesystem is quiescent at this point, then (a) only the * root entry should be in use and (b) that root entry is * clean. */ shrink_dcache_sb(sb);//将缓存在dentry_unused 队列中的属于本设备的dentry结构删除 fsync_dev(sb->s_dev);//将缓存在内存中，还没有同步到磁盘设备中的数据刷新到设备上去。 if (sb->s_root->d_inode->i_state) { mntput(mnt); return -EBUSY; } /* Something might grab it again - redo checks */ spin_lock(&dcache_lock); if (atomic_read(&mnt->mnt_count) > 2) { spin_unlock(&dcache_lock); mntput(mnt); return -EBUSY; } /* OK, that's the point of no return */ mntput(mnt); remove_vfsmnt(mnt); //设备上的缓存，inode，脏页面，锁住的页面，睡眠等待的进程.都处理完了，该把 //当前连接件从各种队列中删除，然后递减相关计数，这样就无法访问到了. //你懂的，这些队列什么的形成了一个挂载点所需要的所有连接 //将超级块的后事处理一下，包括调用其super_operations函数通知相应的文件系统 //然后删除相关申请的缓存什么的。然后安全退出 kill_super(sb, umount_root); return 0; } /* * Called with spinlock held, releases it. */ static void remove_vfsmnt(struct vfsmount *mnt) {//本函数只是卸载相应设备的多次安装中的一次 /* First of all, remove it from all lists */ list_del(&mnt->mnt_instances);//把自己从超级块的多次安装中删除，此成员是挂入超级块的s_mounts上的。 list_del(&mnt->mnt_clash);//一个安装点可以安装多个文件系统，mnt_clash挂入安装点的dentry结构的d_vfsmount上 list_del(&mnt->mnt_list);//系统有个全局的vfsmntlist队列，记录系统中所有的连接件 list_del(&mnt->mnt_child);//从上一层安装中删除，相应的，自己也有一个mnt_mounts队列表示安装在我下面的设备 spin_unlock(&dcache_lock); /* Now we can work safely */ if (mnt->mnt_parent != mnt)//我的上一层设备为我自己，那就说明这是根节点 mntput(mnt->mnt_parent); dput(mnt->mnt_mountpoint);//指安装点的dentry dput(mnt->mnt_root);//设备的根目录dentry结构 if (mnt->mnt_devname) kfree(mnt->mnt_devname); kfree(mnt); } /* * Shrink the dcache for the specified super block. * This allows us to unmount a device without disturbing * the dcache for the other devices. * * This implementation makes just two traversals of the * unused list. On the first pass we move the selected * dentries to the most recent end, and on the second * pass we free them. The second pass must restart after * each dput(), but since the target dentries are all at * the end, it's really just a single traversal. */ /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock * * Shrink the dcache for the specified super block. This * is used to free the dcache before unmounting a file * system */ //删除属于给定超级块的设备的所有数据 void shrink_dcache_sb(struct super_block * sb) { struct list_head *tmp, *next; struct dentry *dentry; /* * Pass one ... move the dentries for the specified * superblock to the most recent end of the unused list. */ spin_lock(&dcache_lock); next = dentry_unused.next;//系统全局的邋锢entry_unused数据结构队列，缓存所有暂时不用要删除的数据 while (next != &dentry_unused) { tmp = next; next = tmp->next; dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb)//越过不属于我们的 continue; list_del(tmp);//从dentry_unused中删除 list_add(tmp, &dentry_unused);//然后插入到节点之后，即第一个节点 }//经过这轮迭代，所有符合条件的节点都已经相对倒叙的放在dentry_unused的前面部分了 /* * Pass two ... free the dentries for this superblock. */ repeat: next = dentry_unused.next; while (next != &dentry_unused) {//再来一次 tmp = next; next = tmp->next; dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue;//这用的着吗，直接退出就可以呀，因为上一趟已经把所有符合条件的都放到前面了 if (atomic_read(&dentry->d_count)) continue; dentry_stat.nr_unused--;//总数减少 list_del(tmp);//正式删除了哈 INIT_LIST_HEAD(tmp);//这...用得着吗? prune_one_dentry(dentry);//把dentry处理一下，其实就是把相关的指针删除，比如hash等 goto repeat; } spin_unlock(&dcache_lock); } static inline void prune_one_dentry(struct dentry * dentry) { struct dentry * parent; list_del_init(&dentry->d_hash); list_del(&dentry->d_child); dentry_iput(dentry); parent = dentry->d_parent; d_free(dentry); if (parent != dentry)//莫非这里如果不成立，那就是说这是根目录了? dput(parent); spin_lock(&dcache_lock); } //KULV: //dev:设备号 int fsync_dev(kdev_t dev) { //sync_buffers的第一趟，0表示对所有 sync_buffers(dev, 0);//第一趟，刷新，不等待被锁住的缓存，过门不入 lock_kernel(); sync_supers(dev);//将超级块也处理一下，实际上就是在super_blocks全局队列中找到设备号相同的 //然后如果相应文件系统有对应的sb->s_op->write_super(sb)，则调用，更新LRU sync_inodes(dev); DQUOT_SYNC(dev); unlock_kernel(); return sync_buffers(dev, 1);//最后一趟，如果buf被锁住了， //那会进入等待状态，有可能会引起重新调度 } /* Call sync_buffers with wait!=0 to ensure that the call does not * return until all buffer writes have completed. Sync() may return * before the writes have finished; fsync() may not. */ /* Godamity-damn. Some buffers (bitmaps for filesystems) * spontaneously dirty themselves without ever brelse being called. * We will ultimately want to put these in a separate list, but for * now we search all of the lists for dirty buffers. */ static int sync_buffers(kdev_t dev, int wait) {//第一趟wait为0，表示不等，第二趟为1，等 int i, retry, pass = 0, err = 0; struct buffer_head * bh, *next; /* One pass for no-wait, three for wait: * 0) write out all dirty, unlocked buffers; * 1) write out all dirty buffers, waiting if locked; * 2) wait for completion by waiting for all buffers to unlock. */ do { retry = 0; /* We search all lists as a failsafe mechanism, not because we expect * there to be dirty buffers on any of the other lists. */ repeat: spin_lock(&lru_list_lock); bh = lru_list[BUF_DIRTY];//取得脏的队列头,待会会被移到其他队列中，比如被锁定的 if (!bh) goto repeat2; //nr_buffers_type[BUF_DIRTY] 指相对每种buf的总数 for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) { next = bh->b_next_free; if (!lru_list[BUF_DIRTY])//这...有可能吗，上面不是检查过bh了吗... break; if (dev && bh->b_dev != dev)//忽略所有不属于此设备的。前面的dev用不着了吧? continue; if (buffer_locked(bh)) {//看是否是锁住的，如果是，那么进去 /* Buffer is locked; skip it unless wait is * requested AND pass > 0. */ if (!wait || !pass) { retry = 1;//第一趟wait为0，不等，直接跳过，只是锁住它 continue; } atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); wait_on_buffer (bh); atomic_dec(&bh->b_count); goto repeat; } //以下节点是没有锁住的 /* If an unlocked buffer is not uptodate, there has * been an IO error. Skip it. */ if (wait && buffer_req(bh) && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_uptodate(bh)) { err = -EIO; continue; } /* Don't write clean buffers. Don't write ANY buffers * on the third pass. */ if (!buffer_dirty(bh) || pass >= 2) continue; atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); ll_rw_block(WRITE, 1, &bh);//刷到磁盘上面 atomic_dec(&bh->b_count); retry = 1; goto repeat;//成功刷了一次，为何要这样重来?难道为了怕链表变动? } repeat2: bh = lru_list[BUF_LOCKED]; if (!bh) { spin_unlock(&lru_list_lock); break; } for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) { next = bh->b_next_free; if (!lru_list[BUF_LOCKED]) break; if (dev && bh->b_dev != dev) continue; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is * requested AND pass > 0. */ if (!wait || !pass) { retry = 1; continue; } atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); wait_on_buffer (bh); //等待buf被解锁，会引起进程调度 spin_lock(&lru_list_lock); atomic_dec(&bh->b_count); goto repeat2;//这里重新来循环是因为等待了之后list变化了吗? } } spin_unlock(&lru_list_lock); /* If we are waiting for the sync to succeed, and if any dirty * blocks were written, then repeat; on the second pass, only * wait for buffers being written (do not pass to write any * more buffers on the second pass). */ } while (wait && retry && ++passs_list.next)) { if (!sb->s_dev) continue; if (dev && sb->s_dev != dev)//不是我的，就掠过 continue; if (!sb->s_dirt) continue; lock_super(sb); if (sb->s_dev && sb->s_dirt && (!dev || dev == sb->s_dev)) if (sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); //如果相应的设备提供了write_super的方法，那调用之。 //对于ext2文件系统来说，这是ext2_write_super函数，其初始化如下 /*其实ext2文件系统也没干啥事，就是更新了一下LRU，时间什么的 static struct super_operations ext2_sops = { read_inode: ext2_read_inode, write_inode: ext2_write_inode, put_inode: ext2_put_inode, delete_inode: ext2_delete_inode, put_super: ext2_put_super, write_super: ext2_write_super, statfs: ext2_statfs, remount_fs: ext2_remount, };*/ unlock_super(sb); } } /** * sync_inodes * @dev: device to sync the inodes from. * * sync_inodes goes through the super block's dirty list, * writes them out, and puts them back on the normal list. */ void sync_inodes(kdev_t dev) { struct super_block * sb = sb_entry(super_blocks.next); /* * Search the super_blocks array for the device(s) to sync. */ spin_lock(&inode_lock); for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) { if (!sb->s_dev) continue; if (dev && sb->s_dev != dev) continue; //找到了属于我的超块 sync_list(&sb->s_dirty);//s_dirty表示属于此超级块(设备)的脏了的inodes，全部写回去 if (dev) break; } spin_unlock(&inode_lock); } static inline void sync_list(struct list_head *head) { struct list_head * tmp; while ((tmp = head->prev) != head)//无非就是得到每一个inode，然后刷新之 sync_one(list_entry(tmp, struct inode, i_list), 0); /*sync_one会吧给定inode的所有脏页面，锁住的页面都耍到磁盘上去然后唤醒所有等待在上面的进程 */ } static inline void sync_one(struct inode *inode, int sync) { if (inode->i_state & I_LOCK) {//不明白，锁住的难道不脏? __iget(inode);//如果被锁住了，先递增计数吧，免得待会被别人先登了 spin_unlock(&inode_lock); __wait_on_inode(inode);//这是没办法的，必须等 iput(inode);//递减技术 spin_lock(&inode_lock); } else { unsigned dirty; list_del(&inode->i_list);//从队列中删除 list_add(&inode->i_list, atomic_read(&inode->i_count) ? &inode_in_use : &inode_unused);//是放入在用的呢，还是不用的?不过都是缓存 /* Set I_LOCK, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY;//这是不是脏的呢 inode->i_state |= I_LOCK;//我锁住了哈，你们别来碰 inode->i_state &= ~I_DIRTY;//去掉脏位 spin_unlock(&inode_lock); filemap_fdatasync(inode->i_mapping);//你懂的，i_mapping 有大文章，缓存的 //里面其实是把mapping->dirty_pages，即这个inode的所有缓冲的脏页面写入到磁盘。 //刚才filemap_fdatasync写完了所有的脏页面，那么，如果真的有脏的，通知一下相应文件系统吧 /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) write_inode(inode, sync); //其实就是调用相应文件系统的inode->i_sb->s_op->write_inode(inode, sync); //把这个inode写进去 //这一次，把锁住的页面locked_pages也刷新了，不过会等待的___wait_on_page(page); filemap_fdatawait(inode->i_mapping); spin_lock(&inode_lock); inode->i_state &= ~I_LOCK; wake_up(&inode->i_wait);//有谁再我这上面睡找了，都醒来 } }

iteye_17655

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
sys_umount系统调用的实现注释。2.4版内核

sys_umount系统调用的实现注释。2.4版内核 2011年05月20日　　sys_umount系统调用的实现注释。2.4版内核 /* * Now umount can handle mount points as well as block devices. * This is important for filesystems which use unnamed block d...
复制链接

扫一扫