文件系统的系统调用过程

最新推荐文章于 2023-12-29 18:14:24 发布

lcw_202

最新推荐文章于 2023-12-29 18:14:24 发布

阅读量1.5k

点赞数

分类专栏： Linux kernel VFS 文章标签： struct buffer file path symlink ext

Linux kernel VFS 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

有关文件系统的系统调用中有好几个，这里分析了其中的系统调用open、sys_read。其它的系统调用参阅内核源代码。

系统调用open

系统调用open是由函数sys_open(fs／open.c)实现的。函数sys_open最终通过具体文件系统的节点的lookup函数，用文件路径名查找得到路径名对应的dentry结构，通过dentry得到对应的inode结构，分配文件描述符，用fd文件描述符作为file的索引，用 dentry结构中数据填充这个file结构。以后对这个文件的访问就用文件描述符来进行。

函数sys_open的调用关系结构如图所示：

图7.1 open操作函数调用图

函数sys_open分析如下（在fs/open.c中）：

asmlinkage long
 sys_open(
const
 char
 *
 filename,
 int
 flags,
 int
 mode)


{


	char
 *
 tmp;


	int
 fd,
 error;


……

	//将文件名从用户空间拷贝到内核空间


        tmp =
 getname(
filename)
;


	if
 (
!
IS_ERR(
tmp)
)
 {


		//从当前进程的files_struct结构的fd数组中找到第一个未使用项，使其指向file结构，将该项的下标作为文件描述符返回


                fd =
 get_unused_fd(
)
;


		if
 (
fd >=
 0
)
 {


			struct
 file *
f =
 filp_open(
tmp,
 flags,
 mode)
;


			error =
 PTR_ERR(
f)
;


			if
 (
IS_ERR(
f)
)


				goto
 out_error;


 

			//将file结构与当前进程连接，即files->fd[fd] = f,以后只通过fd访问file可得到各种操作或信息


                        fd_install(
fd,
 f)
;


		}


out:


		putname(
tmp)
;
//删除名字，清除占用空间


	}


	return
 fd;


 

out_error:
 

put_unused_fd(
fd)
;


	fd =
 error;


	goto
 out;


}


 

struct
 file *
filp_open(
const
 char
 *
 filename,
 int
 flags,
 int
 mode)


{


	int
 namei_flags,
 error;


	struct
 nameidata nd;


　　    //根据入口参数flags对f->f_flags和f->f_mode赋值


	namei_flags =
 flags;


	if
 (
(
namei_flags+
1
)
 &
 O_ACCMODE)


		namei_flags++;


	if
 (
namei_flags &
 O_TRUNC)


 		namei_flags |=
 2
;


       //对路径进行解析，找到欲访问文件的索引节点，得到nameidata结构nd


	error =
 open_namei(
filename,
 namei_flags,
 mode,
 &
nd)
;


	if
 (
!
error)


		return
 dentry_open(
nd.dentry
,
 nd.mnt
,
 flags)
;


 

	return
 ERR_PTR(
error)
;


}

函数filp_open中用到了下下述结构：

struct
 nameidata {


	struct
 dentry *
dentry;
//路径名对应的dentry结构


	struct
 vfsmount *
mnt;


	struct
 qstr last;


	unsigned
 int
 flags;
//寻找方式标识


	int
 last_type;


}
;

每个目录的头两项是标准目录项"."和".."，分别代表当前目录和父目录。函数dentry_open根据查询到的nameidata结构填写file结构，从而完成open的操作。函数dentry_open分析如下（在fs/open.c中）：

struct
 file *
dentry_open(
struct
 dentry *
dentry,
 struct
 vfsmount *
mnt,
 int
 flags)


{


	struct
 file *
 f;


	struct
 inode *
inode;


	static
 LIST_HEAD(
kill_list)
;


	int
 error;


 

	error =
 -
ENFILE;


	f =
 get_empty_filp(
)
;
//分配file结构f


	if
 (
!
f)


		goto
 cleanup_dentry;


	f->
f_flags =
 flags;


	f->
f_mode =
 (
flags+
1
)
 &
 O_ACCMODE;


	inode =
 dentry->
d_inode;


	if
 (
f->
f_mode &
 FMODE_WRITE)
 {


		error =
 get_write_access(
inode)
;
//得到文件写的许可


		if
 (
error)


			goto
 cleanup_file;


	}


 

	f->
f_dentry =
 dentry;


	f->
f_vfsmnt =
 mnt;


	f->
f_pos =
 0
;


	f->
f_reada =
 0
;


	f->
f_op =
 fops_get(
inode->
i_fop)
;
  //赋上节点的操作函数


	file_move(
f,
 &
inode->
i_sb->
s_files)
;
//将f移到s_files链表中


 

	/* preallocate kiobuf for O_DIRECT */


	f->
f_iobuf =
 NULL;


	f->
f_iobuf_lock =
 0
;


	if
 (
f->
f_flags &
 O_DIRECT)
 {


		error =
 alloc_kiovec(
1
,
 &
f->
f_iobuf)
;


		if
 (
error)


			goto
 cleanup_all;


	}


 

	if
 (
f->
f_op &&
 f->
f_op->
open)
 {


		error =
 f->
f_op->
open(
inode,
f)
;
//调用文件操作函数集的open函数


		if
 (
error)


			goto
 cleanup_all;


	}


	f->
f_flags &=
 ~(
O_CREAT |
 O_EXCL |
 O_NOCTTY |
 O_TRUNC)
;


 

	return
 f;


 

cleanup_all:


	……

}

open_namei()函数实际上是打开操作中的主要函数，它的功能是通过路径名得到相应的nameidata结构，它通过path_walk()轮流调用real_lookup()函数，real_lookup再调用各文件系统自己的inode_operations->lookup()，得到给定路径名相应的dentry和vfsmount结构。

函数open_namei的参数flag为00时表示不需要许可，为01 时表示需要读许可，为10时表示需要写许可，为11 时表示需要读/写许可。

open_namei()函数的分析如下（在fs/namei.c中）：

int
 open_namei(
const
 char
 *
 pathname,
 int
 flag,
 int
 mode,
 struct
 nameidata *
nd)


{


	int
 acc_mode,
 error =
 0
;


	struct
 inode *
inode;


	struct
 dentry *
dentry;


	struct
 dentry *
dir;


	int
 count =
 0
;


	//如果不是creat选项，则仅是查找


	 if
 (
!
(
flag &
 O_CREAT)
)
 {


		error =
 path_lookup(
pathname,
 lookup_flags(
flag)
,
 nd)
;


 

		dentry =
 nd->
dentry;


		goto
 ok;


	}


 

	//对于create选项来说，需要知道父目录


	error =
 path_lookup(
pathname,
 LOOKUP_PARENT,
 nd)
;


 

	/*

	 * We have the parent and last component. First of all, check

	 * that we are not asked to creat(2) an obvious directory - that

	 * will not do.

	 */


	error =
 -
EISDIR;


	if
 (
nd->
last_type !=
 LAST_NORM ||
 nd->
last.name
[
nd->
last.len
]
)


		goto
 exit;


 

	dir =
 nd->
dentry;


	down(
&
dir->
d_inode->
i_sem)
;


	dentry =
 lookup_hash(
&
nd->
last,
 nd->
dentry)
;


……

do_last:


	error =
 PTR_ERR(
dentry)
;


	if
 (
IS_ERR(
dentry)
)
 {


		up(
&
dir->
d_inode->
i_sem)
;


		goto
 exit;


	}


	//如果是错误的 dentry,创建file


	if
 (
!
dentry->
d_inode)
 {


		……

        //调用节点操作函数集的inode->i_op->create函数创建dentry


		error =
 vfs_create(
dir->
d_inode,
 dentry,
 mode)
;


		up(
&
dir->
d_inode->
i_sem)
;


		dput(
nd->
dentry)
;


		nd->
dentry =
 dentry;


		……

		goto
 ok;


	}


    对节点进行保护性处理

	……

ok:


	 /* FIFO, sockets 和device文件是特殊文件，它们并不实际存在于文件系统，因而，即使文件系统是可读的，它们也能进行写操作。接着对特殊结点进行判断及处理　　 进行有关link的处理*/


	error =
 may_open(
nd,
 acc_mode,
 flag)
;


	if
 (
error)


		goto
 exit;


	return
 0
;


……

}


 

int
 fastcall path_lookup(
const
 char
 *
name,
 unsigned
 int
 flags,
 

struct
 nameidata *
nd)


{


	int
 retval;


 

	nd->
last_type =
 LAST_ROOT;
 /* if there are only slashes... */


	nd->
flags =
 flags;


	nd->
depth =
 0
;


 

	read_lock(
&
current->
fs->
lock)
;


	if
 (
*
name==
'/'
)
 {
//如果路径名是“/”


　　　　//查找替代根目录


		if
 (
current->
fs->
altroot &&
 !
(
nd->
flags &
 LOOKUP_NOALT)
)
 {


			nd->
mnt =
 mntget(
current->
fs->
altrootmnt)
;


			nd->
dentry =
 dget(
current->
fs->
altroot)
;


			read_unlock(
&
current->
fs->
lock)
;


			if
 (
__emul_lookup_dentry(
name,
nd)
)


				return
 0
;


			read_lock(
&
current->
fs->
lock)
;


		}


		nd->
mnt =
 mntget(
current->
fs->
rootmnt)
;


		nd->
dentry =
 dget(
current->
fs->
root)
;


	}
 else
 {


		nd->
mnt =
 mntget(
current->
fs->
pwdmnt)
;
//当前进程挂接点


		nd->
dentry =
 dget(
current->
fs->
pwd)
;
//当前目录


	}


	read_unlock(
&
current->
fs->
lock)
;


	current->
total_link_count =
 0
;


	retval =
 link_path_walk(
name,
 nd)
;
//由路径查找得到对应dentry结构


	if
 (
unlikely(
current->
audit_context

		     &&
 nd &&
 nd->
dentry &&
 nd->
dentry->
d_inode)
)


		audit_inode(
name,


			    nd->
dentry->
d_inode->
i_ino,


			    nd->
dentry->
d_inode->
i_rdev)
;


	return
 retval;


}

　　函数link_path_walk是基本的路径解析函数，它由路径名查找到最终的dentry结构，将信息保存在结构变量nd中返回。函数link_path_walk分析如下（在fs/namei.c中）：

int
 link_path_walk(
const
 char
 *
 name,
 struct
 nameidata *
nd)


{


	struct
 dentry *
dentry;


	struct
 inode *
inode;


	int
 err;


	unsigned
 int
 lookup_flags =
 nd->
flags;


 

	while
 (
*
name==
'/'
)
//跳过开始的“/”，如/test/test2变为*name=test/test2


		name++;


	if
 (
!*
name)


		goto
 return_reval;


 

	inode =
 nd->
dentry->
d_inode;


	if
 (
current->
link_count)


		lookup_flags =
 LOOKUP_FOLLOW;
//设置查找方式，


 

	//这里，我们有一个真实的路径　


	for
(
;;
)
 {


		unsigned
 long
 hash;


		struct
 qstr this;


		unsigned
 int
 c;


 

		err =
 permission(
inode,
 MAY_EXEC)
;


		dentry =
 ERR_PTR(
err)
;


		if
 (
err)





			break
;


 

		this.name
 =
 name;


		c =
 *
(
const
 unsigned
 char
 *
)
name;
//取目录名中的第一个字母


 

		hash =
 init_name_hash(
)
;
//即hash=0


		//以“/”为分界符循环处理路径名


do
 {
//处理开始到“/”的名字，如test/test2中处理test


			name++;


			hash =
 partial_name_hash(
c,
 hash)
;
//由名得到hash值


			c =
 *
(
const
 unsigned
 char
 *
)
name;


		}
 while
 (
c &&
 (
c !=
 '/'
)
)
;


　　　　得到名字长度，如test长度


		this.len
 =
 name -
 (
const
 char
 *
)
 this.name
;


		this.hash
 =
 end_name_hash(
hash)
;
//即return (unsigned int) hash;


 

		if
 (
!
c)


			goto
 last_component;
//去处理路径名中最后一个路径名


		while
 (
*++
name ==
 '/'
)
;
//跳过“/”，如：由/test2得到*name=test2


		if
 (
!*
name)


			goto
 last_with_slashes;
//处理最后是“/”的情况


 

		//进入当前目录或父目录，进入当前目录是不处理


		if
 (
this.name
[
0
]
 ==
 '.'
)
 switch
 (
this.len
)
 {


			default
:


				break
;


			case
 2
:
	

				if
 (
this.name
[
1
]
 !=
 '.'
)


					break
;


　　　　　　　　//将nd的父目录赋给nd->dentry，即进入父目录


				follow_dotdot(
nd)
;


				inode =
 nd->
dentry->
d_inode;


				/* fallthrough */


			case
 1
:
//当前目录，继续


				continue
;


		}


		// 调用低层自己文件系统的d_hash函数，如果出错，则跳出循环


		if
 (
nd->
dentry->
d_op &&
 nd->
dentry->
d_op->
d_hash)
 {


			err =
 nd->
dentry->
d_op->
d_hash(
nd->
dentry,
 &
this)
;


			if
 (
err <
 0
)


				break
;


		}


 

		nd->
flags |=
 LOOKUP_CONTINUE;


		// 利用得到的hash值进行实际上的查找 


		err =
 do_lookup(
nd,
 &
this,
 &
next)
;


		if
 (
err)


			break
;


		//检查挂接点，检查&next.dentry处是否是挂接点，若是得到挂接点的dentry


		follow_mount(
&
next.mnt
,
 &
next.dentry
)
;


 

　　　　

			err =
 -
ENOENT;


		inode =
 next.dentry
->
d_inode;


		if
 (
!
inode)


			goto
 out_dput;


		err =
 -
ENOTDIR;
 

		if
 (
!
inode->
i_op)


			goto
 out_dput;


       //对节点的一些特殊情况如链接等进行处理





		if
 (
inode->
i_op->
follow_link)
 {


			mntget(
next.mnt
)
;


          //查找得到链接的dentry





			err =
 do_follow_link(
next.dentry
,
 nd)
;


			dput(
next.dentry
)
;


			mntput(
next.mnt
)
;


			if
 (
err)


				goto
 return_err;


			err =
 -
ENOENT;


			inode =
 nd->
dentry->
d_inode;


			if
 (
!
inode)


				break
;


			err =
 -
ENOTDIR;
 

			if
 (
!
inode->
i_op)


				break
;


		}
 else
 {


			dput(
nd->
dentry)
;


　　　　　　//赋上下一个名字的参数，如：test对就的vfsmount和dentry结构


			nd->
mnt =
 next.mnt
;
 //赋上下一个名字的对应的文件系统挂接点结构


			nd->
dentry =
 next.dentry
;


		}


		err =
 -
ENOTDIR;
 

		if
 (
!
inode->
i_op->
lookup)


			break
;


      //没到最后一个名字时，接着查找一个名字，如：test2





		continue
;
　

 

// 到这里循环查找每层目录名已完成，剩下处理最后一层目录名 


 

last_with_slashes:
 //如果文件路径名以“/”结尾


		lookup_flags |=
 LOOKUP_FOLLOW |
 LOOKUP_DIRECTORY;


 

last_component:
 //对路径名的最后一项做处理，与前面搜索方法一样


		nd->
flags &=
 ~LOOKUP_CONTINUE;


		if
 (
lookup_flags &
 LOOKUP_PARENT)


			goto
 lookup_parent;


      //处理“.”和“..”的情况





		if
 (
this.name
[
0
]
 ==
 '.'
)
 switch
 (
this.len
)
 {


			default
:


				break
;


			case
 2
:
	

				if
 (
this.name
[
1
]
 !=
 '.'
)


					break
;


				follow_dotdot(
&
nd->
mnt,
 &
nd->
dentry)
;


				inode =
 nd->
dentry->
d_inode;


				/* fallthrough */


			case
 1
:


				goto
 return_reval;


		}


		if
 (
nd->
dentry->
d_op &&
 nd->
dentry->
d_op->
d_hash)
 {


			err =
 nd->
dentry->
d_op->
d_hash(
nd->
dentry,
 &
this)
;


			if
 (
err <
 0
)


				break
;


		}


		err =
 do_lookup(
nd,
 &
this,
 &
next)
;


		if
 (
err)


			break
;


		follow_mount(
&
next.mnt
,
 &
next.dentry
)
;


		inode =
 next.dentry
->
d_inode;


		if
 (
(
lookup_flags &
 LOOKUP_FOLLOW)
//处理符号链接的情况


		    &&
 inode &&
 inode->
i_op &&
 inode->
i_op->
follow_link)
 {


			mntget(
next.mnt
)
;


			err =
 do_follow_link(
next.dentry
,
 nd)
;


			dput(
next.dentry
)
;


			mntput(
next.mnt
)
;


			if
 (
err)


				goto
 return_err;


			inode =
 nd->
dentry->
d_inode;


		}
 else
 {


			dput(
nd->
dentry)
;


			nd->
mnt =
 next.mnt
;


			nd->
dentry =
 next.dentry
;


		}


		err =
 -
ENOENT;


		if
 (
!
inode)


			break
;


		if
 (
lookup_flags &
 LOOKUP_DIRECTORY)
 {


			err =
 -
ENOTDIR;
 

			if
 (
!
inode->
i_op ||
 !
inode->
i_op->
lookup)


				break
;


		}


		goto
 return_base;


lookup_parent:


 

		nd->
last =
 this;


		nd->
last_type =
 LAST_NORM;


		if
 (
this.name
[
0
]
 !=
 '.'
)


			goto
 return_base;


		if
 (
this.len
 ==
 1
)


			nd->
last_type =
 LAST_DOT;


		else
 if
 (
this.len
 ==
 2
 &&
 this.name
[
1
]
 ==
 '.'
)


			nd->
last_type =
 LAST_DOTDOT;


		else


			goto
 return_base;


return_reval:


       对dentry进行保护性检查及处理




		/*

		 * We bypassed the ordinary revalidation routines.

		 * We may need to check the cached dentry for staleness.

		 */


		if
 (
nd->
dentry &&
 nd->
dentry->
d_sb &&


		    (
nd->
dentry->
d_sb->
s_type->
fs_flags &
 FS_REVAL_DOT)
)
 {


			err =
 -
ESTALE;


			/* Note: we do not d_invalidate() */


			if
 (
!
nd->
dentry->
d_op->
d_revalidate(
nd->
dentry,
 nd)
)


				break
;


		}


return_base:


		return
 0
;


out_dput:


		dput(
next.dentry
)
;


		break
;


	}


	path_release(
nd)
;


return_err:


	return
 err;


}

函数open_namei先利用函数cached_lookup()调用函数d_lookup(parent, name)在cache中搜索dentry，如没搜索到就再接着利用函数real_lookup()进行搜索，real_lookup()先调用函数 d_lookup(parent, name)在cache中搜索dentry，这样做是为了避免在等待目录的semaphore时，它在cache中创建了。如没搜索到，再在文件系统中搜索dentry,从dentry结构中可得到inode。下面来分析函数d_lookup()（在fs/dcach.c中）：函数d_lookup的功能是搜索父目录parent的孩子是否有名为name的dentry结构，如果查找到相配的dentry结构，就返回 dentry。当调用者完成了对dentry的使用，就必须用d_put释放dentry。函数d_lookup分析如下：

struct
 dentry *
 d_lookup(
struct
 dentry *
 parent,
 struct
 qstr *
 name)


{


	unsigned
 int
 len =
 name->
len;


	unsigned
 int
 hash =
 name->
hash;


	const
 unsigned
 char
 *
str =
 name->
name;


	struct
 list_head *
head =
 d_hash(
parent,
hash)
;


	struct
 list_head *
tmp;


 

	spin_lock(
&
dcache_lock)
;


	tmp =
 head->
next;


    //在父目录中的一个个成员按成员的变量进行比较，将链表查找完


	for
 (
;;
)
 {


　　　　//得到链表成员tmp的dentry结构中的d_hash成员，即dentry


		struct
 dentry *
 dentry =
 list_entry(
tmp,
 struct
 dentry,
 d_hash)
;


		if
 (
tmp ==
 head)


			break
;


		tmp =
 tmp->
next;


		if
 (
dentry->
d_name.hash
 !=
 hash)


			continue
;


		if
 (
dentry->
d_parent !=
 parent)


			continue
;


		if
 (
parent->
d_op &&
 parent->
d_op->
d_compare)
 {


			if
 (
parent->
d_op->
d_compare(
parent,
 &
dentry->
d_name,
 name)
)


				continue
;


		}
 else
 {


			if
 (
dentry->
d_name.len
 !=
 len)


				continue
;


			if
 (
memcmp(
dentry->
d_name.name
,
 str,
 len)
)


				continue
;


		}


		__dget_locked(
dentry)
;


		dentry->
d_vfs_flags |=
 DCACHE_REFERENCED;


		spin_unlock(
&
dcache_lock)
;


		return
 dentry;


	}


	spin_unlock(
&
dcache_lock)
;


	return
 NULL;


}

函数real_lookup是对实际文件系统进行查找，它实际上是调用具体文件系统节点的查找函数去执行查找的。这个函数分析如下（在fs/namei.c中）：

static
 struct
 dentry *
 real_lookup(
struct
 dentry *
 parent,
 struct
 qstr *
 name,
 int
 flags)


{


	struct
 dentry *
 result;


	struct
 inode *
dir =
 parent->
d_inode;


 

	down(
&
dir->
i_sem)
;


	/*再在缓存中查找一次，因为当等待信号量操作时，缓存中已经创建了该目录名dentry。*/


	result =
 d_lookup(
parent,
 name)
;


	if
 (
!
result)
 {


		struct
 dentry *
 dentry =
 d_alloc(
parent,
 name)
;
//申请dentry的空间


		result =
 ERR_PTR(
-
ENOMEM)
;


		if
 (
dentry)
 {


			lock_kernel(
)
;


			result =
 dir->
i_op->
lookup(
dir,
 dentry)
;


			unlock_kernel(
)
;


			if
 (
result)


				dput(
dentry)
;
//将dentry释放


			else


				result =
 dentry;


		}


		up(
&
dir->
i_sem)
;


		return
 result;


	}


 

	//revalidate dentry,即刷新缓存中dentry


   ……




	return
 result;


}

这个函数调用到具体的文件系统的inode_operation结构中的查找函数，现以ext2文件系统为例，结构成员lookup所指的具体函数为ext2_lookup，在下一章具体分析这个函数（在fs/ext2/namei.c中）。 struct inode_operations ext2_dir_inode_operations = {

struct
 inode_operations ext2_dir_inode_operations =
 {


	create:
		ext2_create,


	lookup:
		ext2_lookup,


	link:
		ext2_link,


	unlink:
		ext2_unlink,


	symlink:
	ext2_symlink,


	mkdir:
		ext2_mkdir,


	rmdir:
		ext2_rmdir,


	mknod:
		ext2_mknod,


	rename:
		ext2_rename

}
;

read系统调用

在fs/read_write.c中有sys_read系统调用函数的实现，它调用到了具体文件系统的file结构的读操作函数来实现的。而大部分文件系统最终调用fs/buffer.c中通用读函数block_read_full_page来实现，这在写操作函数也是类似的。

sys_read系统调用函数具体分析如下：

asmlinkage ssize_t sys_read(
unsigned
 int
 fd,
 char
 *
 buf,
 size_t count)


{


	ssize_t ret;


	struct
 file *
 file;


 

	ret =
 -
EBADF;


    //由文件句柄数得到文件结构


	file =
 fget(
fd)
;


	if
 (
file)
 {


		if
 (
file->
f_mode &
 FMODE_READ)
 {


			//锁住由buf指向的用户内存区，大小为count的区域


           ret =
 locks_verify_area(
FLOCK_VERIFY_READ,
 

                           file->
f_dentry->
d_inode,


						file,
 file->
f_pos,
 count)
;


			if
 (
!
ret)
 {


				ssize_t (
*
read)
(
struct
 file *,
 char
 *,
 size_t,
 loff_t *
)
;


				ret =
 -
EINVAL;


				if
 (
file->
f_op &&
 (
read =
 file->
f_op->
read)
 !=
 NULL)
 {


				 	//执行底层逻辑文件系统用来处理read操作的函数


					ret =
 read(
file,
 buf,
 count,
 &
file->
f_pos)
;


				}


			}


		}


		if
 (
ret >
 0
)


　　　　　　//将file access事件告诉父目录


			dnotify_parent(
file->
f_dentry,
 DN_ACCESS)
;


		fput(
file)
;


	}


	return
 ret;


}

block_read_full_page函数是块设备的通用read page函数，但块设备必须有正常的get_block函数。大多数文件系统调用这个函数进行读操作。block_read_full_page函数先给一页创建空buffer，算出块号，锁住buffer，给buffer设置异步状态，异步从设备读一页后，再给buffer开锁，设置buffer为更新状态。 block_read_full_page函数在fs/buffer.c中，分析如下：

int
 block_read_full_page(
struct
 page *
page,
 get_block_t *
get_block)


{


	struct
 inode *
inode =
 page->
mapping->
host;


	unsigned
 long
 iblock,
 lblock;


	struct
 buffer_head *
bh,
 *
head,
 *
arr[
MAX_BUF_PER_PAGE]
;


	unsigned
 int
 blocksize,
 blocks;


	int
 nr,
 i;


 

	if
 (
!
PageLocked(
page)
)


		PAGE_BUG(
page)
;


	blocksize =
 1
 <<
 inode->
i_blkbits;


	if
 (
!
page->
buffers)


		create_empty_buffers(
page,
 inode->
i_dev,
 blocksize)
;


	//得到页中buffer链表头


    head =
 page->
buffers;


　　//一页的块数=页大小/块大小


	blocks =
 PAGE_CACHE_SIZE >>
 inode->
i_blkbits;


   //块号 = 页序号*页大小/块大小


	iblock =
 page->
index <<
 (
PAGE_CACHE_SHIFT -
 inode->
i_blkbits)
;


   //文件块数 = (文件大小+块大小-1)/块大小


	lblock =
 (
inode->
i_size+
blocksize-
1
)
 >>
 inode->
i_blkbits;


	bh =
 head;


	nr =
 0
;


	i =
 0
;


 

	do
 {


        //如buffer是更新状态，继续循环


		if
 (
buffer_uptodate(
bh)
)


			continue
;


        //buffer不是mapped状态


		if
 (
!
buffer_mapped(
bh)
)
 {


           //页中块数<文件大小块数


			if
 (
iblock <
 lblock)
 {


               //读出块iblock块buffer中


				if
 (
get_block(
inode,
 iblock,
 bh,
 0
)
)


					continue
;


			}


           如果设置mapped失败，清除page页

			……

			//get_block()可能已同步更新buffer，检查若是更新状态，继续循环


			if
 (
buffer_uptodate(
bh)
)


				continue
;


		}


 

		arr[
nr]
 =
 bh;


		nr++;


	}
 while
 (
i++,
 iblock++,
 (
bh =
 bh->
b_this_page)
 !=
 head)
;


 

	if
 (
!
nr)
 {


		// 所有buffers是更新的，也要设置页为更新状态uptodate。


		SetPageUptodate(
page)
;


		UnlockPage(
page)
;


		return
 0
;


	}


 

	//第二步锁住所有 buffers 


	for
 (
i =
 0
;
 i <
 nr;
 i++
)
 {


		struct
 buffer_head *
 bh =
 arr[
i]
;


		lock_buffer(
bh)
;


        //设置buffer结构的b_state为BH_Async状态


		set_buffer_async_io(
bh)
;


	}


 

	//第三步：开始I/O读操作


	for
 (
i =
 0
;
 i <
 nr;
 i++
)
 {


		struct
 buffer_head *
 bh =
 arr[
i]
;


        //如buffer是更新状态，就结束buffer的BH_Async状态


		if
 (
buffer_uptodate(
bh)
)


			end_buffer_io_async(
bh,
 1
)
;


		else


			submit_bh(
READ,
 bh)
;
//提交读请求


	}


 

	return
 0
;


}