直接来看下ftrace打印出来的路径:
3) | vfs_write() {
3) | do_sync_write() {
3) 2.805 us | ocfs2_file_aio_write();
3) 3.285 us | }
3) 4.392 us | }
vfs_write()代码逻辑很简单,检查参数,确认用户态buf可用,回调具体文件系统实现的write方法,然后触发fnotify,
更新进程IO记帐,包括写字节数和自增写系统调用数。
404 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
405 {
406 ssize_t ret;
407
408 if (!(file->f_mode & FMODE_WRITE))
409 return -EBADF;
410 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
411 return -EINVAL;
412 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
413 return -EFAULT;
414
415 ret = rw_verify_area(WRITE, file, pos, count);
416 if (ret >= 0) {
417 count = ret;
418 if (file->f_op->write)
419 ret = file->f_op->write(file, buf, count, pos);
420 else
421 ret = do_sync_write(file, buf, count, pos);
422 if (ret > 0) {
423 fsnotify_modify(file);
424 add_wchar(current, ret);
425 }
426 inc_syscw(current);
427 }
428
429 return ret;
430 }
ocfs2的文件操作表,并没有直接将.write和ocfs2_file_aio_write()挂钩,fs/ocfs2/file.c:
2656 const struct file_operations ocfs2_fops = {
2657 .llseek = generic_file_llseek,
2658 .read = do_sync_read,
2659 .write = do_sync_write,
2660 .mmap = ocfs2_mmap,
2661 .fsync = ocfs2_sync_file,
2662 .release = ocfs2_file_release,
2663 .open = ocfs2_file_open,
2664 .aio_read = ocfs2_file_aio_read,
2665 .aio_write = ocfs2_file_aio_write,
2666 .unlocked_ioctl = ocfs2_ioctl,
2667 #ifdef CONFIG_COMPAT
2668 .compat_ioctl = ocfs2_compat_ioctl,
2669 #endif
2670 .lock = ocfs2_lock,
2671 .flock = ocfs2_flock,
2672 .splice_read = ocfs2_file_splice_read,
2673 .splice_write = ocfs2_file_splice_write,
2674 .fallocate = ocfs2_fallocate,
2675 };
2676
2677 const struct file_operations ocfs2_dops = {
2678 .llseek = generic_file_llseek,
2679 .read = generic_read_dir,
2680 .readdir = ocfs2_readdir,
2681 .fsync = ocfs2_sync_file,
2682 .release = ocfs2_dir_release,
2683 .open = ocfs2_dir_open,
2684 .unlocked_ioctl = ocfs2_ioctl,
2685 #ifdef CONFIG_COMPAT
2686 .compat_ioctl = ocfs2_compat_ioctl,
2687 #endif
2688 .lock = ocfs2_lock,
2689 .flock = ocfs2_flock,
2690 };
do_sync_write()是vfs层的出口,也是ocfs2文件系统层的入口,具体文件系统接下来是要跟快设备打交道的,是时候甩掉vfs层的东西了,从参数上来看就是把file结构体,用户态buf,文件指针ppos,转换到kiocb和iovec结构体。 kiocb很多子段是从file结构体mirror的,还有字段指向进程。
我测试了下,读写同一个文件情况下wait_on_retry_sync_kiocb()和wait_on_sync_kiocb()都没有被调用。至于,什么时候会被调用,还不清楚。
fs/read_write.c:
378 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
379 {
380 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
381 struct kiocb kiocb;
382 ssize_t ret;
383
384 init_sync_kiocb(&kiocb, filp);
385 kiocb.ki_pos = *ppos;
386 kiocb.ki_left = len;
387 kiocb.ki_nbytes = len;
388
389 for (;;) {
390 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
391 if (ret != -EIOCBRETRY)
392 break;
393 wait_on_retry_sync_kiocb(&kiocb);
394 }
395
396 if (-EIOCBQUEUED == ret)
397 ret = wait_on_sync_kiocb(&kiocb);
398 *ppos = kiocb.ki_pos;
399 return ret;
400 }