open系统调用的源码

最新推荐文章于 2024-07-23 00:52:11 发布

HULIHONG

最新推荐文章于 2024-07-23 00:52:11 发布

阅读量1.5k

点赞数

分类专栏： linux内核学习文章标签： struct file 任务文档 linux descriptor

linux内核学习专栏收录该内容

34 篇文章 0 订阅

订阅专栏

用户界面
我们使用系统调用open来打开一个文件，例如:

fd = open( "/home/mine/data/myfile",O_RDWR|O_CREAT);

下面来看看Linux是如何完成的，首先是系统调用的代码:

sys_open的源程序



asmlinkage long sys_open(const char * filename, int flags, int mode)



{



	char * tmp;



	int fd, error;







#if BITS_PER_LONG != 32



	flags |= O_LARGEFILE;



#endif



	tmp = getname(filename);



	fd = PTR_ERR(tmp);



	if (!IS_ERR(tmp)) {



		fd = get_unused_fd();/*<-----------------------------------(1) */



		if (fd >= 0) {



			struct file *f = filp_open(tmp, flags, mode);/*<---(2) */



			error = PTR_ERR(f);



			if (IS_ERR(f))



				goto out_error;



			fd_install(fd, f);/*<------------------------------(3) */



		}



out:



		putname(tmp);



	}



	return fd;







out_error:



	put_unused_fd(fd);



	fd = error;



	goto out;



}

这里面完成的几个工作

1.注意到返回一个整数fd,所以在(1)的位置获得一个整数fd。
2.根据路径/home/mine/data/myfile找到(或创建)文件，并且创建一个结构file(见(2))。
3.将整数fd和file结构指针f联系起来(见(3))。

几个问题
1.在《外设中的目录项(以EXT2为例)和内存中的"目录项"的比较
》一文中可见，一个文件是和inode,进而和dentry联系，那么为什么要创建一个结构file?

原因是，我们可以有多个对同一个文件操作的"会话",例如一个文件描述符fd1用于读，另一个fd2�
糜谛础d1的位置是文件开头，fd2的位置是文件结尾等等。因此，file的目的是记录一次对文件�
僮鞯幕峄啊�

2.为什么不直接返回结构file指针,而要用一个描述符整数fd?

原因返回的信息是被用户程序使用的，不可能返回结构指针。所以采用间接的方法，返回一个整数
，这个整数对应一个file的指针。

下面讨论的专题

1.Linux对文件描述符，结构file的有关实现。

2.如何从路径找到(或创建)文件。

我们的

href=http://www2.linuxforum.net/ker_plan/index/main.htm>内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-04-30 14:35

文件描述符与file结构

[re: lucian_yao ]

一个任务(task)的描述结构task_struct中的字段files指向数据结构 files_struct,这个结构用于管理该任务的文件。

在files_struct中，

fd_array 就是联系描述符fd和文件结构file指针的数组，事实上，下标为fd的元素中保存了指向file结构的指针。
open_fds 是一个标记打开文件的位图。例如，如果fd=7被已被使用，那么open_fds的bit 7就为1。这个结构用于快速查找未使用的描述符。
close_on_exec 是一个标记位图。某个bit为1表明在这个任务执行系统调用execve的时候，这个bit对应的文件将被关闭，这个描述符将空出来。

fd_array中包含的指向结构file的指针，如前所述，每个file描述和一个文件的会话，因此，它(通过dentry)指向文件结构inode,另外，包含了会话信息。
如图，两个file结构指向同一个dentry,从而指向同一个inode,但是各自保留了私有的会话信息，如偏移量f_op等。

我们的
内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-05-08 11:02

files_struct这三个数组的初始化、再分配

[re: lucian_yao ]

files_struct这三个数组的初始化、再分配

初始化
files_struct内置了3个初始的数组:



--------------------------------



struct files_struct {



        ... ...



	struct file ** fd;	/* current fd array */



	fd_set *close_on_exec;



	fd_set *open_fds;



	fd_set close_on_exec_init;/*<----------------------这里*/



	fd_set open_fds_init;/*<---------------------------这里*/



	struct file * fd_array[NR_OPEN_DEFAULT];/*<--------这里*/



};



---------------------------------

其中，fd_set为:



---------------------



typedef __kernel_fd_set		fd_set;







typedef struct {



	unsigned long fds_bits [__FDSET_LONGS];



} __kernel_fd_set;



---------------------

1024/8=128个字节长。
因此，当执行fork，在建立新的任务的文件结构时(copy_files),执行下面的语句:



-----------------------------------



static int copy_files(unsigned long clone_flags, struct task_struct * tsk)



{	... ...



	newf = kmem_cache_alloc(files_cachep,

SLAB_KERNEL);/*<-------------分配files_struct，自然分配了内嵌的数组*/



	... ...



	



	newf->close_on_exec = &newf->close_on_exec_init;/*<----------------指向内部的数组*/



	newf->open_fds	    = &newf->open_fds_init;



	newf->fd	    = &newf->fd_array[0];







}



------------------------------------

也就自然地分配了上面3个数组,然后将files_struct的各个指针指向内部的这3个数组。

再分配

但是，上面初始分配的数组不一定够用，Linux采用的策略是，重新分配这3个数组(从而可以保证�
嵌际橇�)，然后将老的数组中的内容拷贝到新的数组中，再释放老的数组。
类似remalloc的方法。
下面举expand_fdset的例子(描述符数组的再分配是由expand_fd_array完成的):



----------------------------------------------------------



/*



 * Expand the fdset in the files_struct.  Called with the files spinlock



 * held for write.



 */



int expand_fdset(struct files_struct *files, int

nr)/*<----------------nr是要分配的大小*/



{



	fd_set *new_openset = 0, *new_execset = 0;



	int error, nfds = 0;







	error = -EMFILE;



	if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)  /*<-----------控制数组的上界*/



		goto out;







	nfds = files->max_fdset;



	write_unlock(&files->file_lock);







	/* Expand to the max in easy steps */



	do

{/*<---------------------------------获得数组的大小，不一定等于nr,如果比一页小，则增长

到一页，不然以2的幂增长*/



		if (nfds < (PAGE_SIZE * 8))



			nfds = PAGE_SIZE * 8;



		else {



			nfds = nfds * 2;



			if (nfds > NR_OPEN)



				nfds = NR_OPEN;



		}



	} while (nfds <= nr);







	error = -ENOMEM;



	new_openset =

alloc_fdset(nfds);/*<-----------------------------(该行及下行)分配新的位图数组*/



	new_execset = alloc_fdset(nfds);



	write_lock(&files->file_lock);



	if (!new_openset || !new_execset)



		goto out;







	error = 0;



	



	/* Copy the existing tables and install the new pointers */



	if (nfds > files->max_fdset) {



		int i = files->max_fdset / (sizeof(unsigned long) * 8);



		int count = (nfds - files->max_fdset) / 8;



		



		/* 



		 * Don't copy the entire array if the current fdset is



		 * not yet initialised.  



		 */



		if (i) {



			memcpy (new_openset, files->open_fds,

files->max_fdset/8);/*<---将老数组拷贝到新数组里，多余部分置0*/



			memcpy (new_execset, files->close_on_exec, files->max_fdset/8);



			memset (&new_openset->fds_bits[ i ], 0, count);



			memset (&new_execset->fds_bits[ i ], 0, count);



		}



		



		nfds = xchg(&files->max_fdset,

nfds);/*<-------------------更换files_struct中的大小和指针*/



		new_openset = xchg(&files->open_fds, new_openset);



		new_execset = xchg(&files->close_on_exec, new_execset);



		write_unlock(&files->file_lock);



		free_fdset (new_openset, nfds);/*<-------------------------释放老的数组*/        



		free_fdset (new_execset, nfds);



		write_lock(&files->file_lock);



		return 0;



	} 



	/* Somebody expanded the array while we slept ... */







out:



	write_unlock(&files->file_lock);



	if (new_openset)



		free_fdset(new_openset, nfds);



	if (new_execset)



		free_fdset(new_execset, nfds);



	write_lock(&files->file_lock);



	return error;



}



--------------------------------------------------------------

注意，释放老的数组时，如果是原来嵌在files_struct中的，则不能释放：



------------------------------------------



void free_fdset(fd_set *array, int num)



{



	int size = num / 8;







	if (!array) {



		printk (KERN_ERR __FUNCTION__ "array = 0 (num = %d)/n", num);



		return;



	}



	



	if (num <= __FD_SETSIZE) /* Don't free an embedded fdset

*//*<-------内嵌的不能释放！*/



		return;



	else if (size <= PAGE_SIZE)



		kfree(array);



	else



		vfree(array);



}



---------------------------------------

我们的

href=http://www2.linuxforum.net/ker_plan/index/main.htm>内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-05-08 11:04

如何获得空闲的fd

[re: lucian_yao ]

现在回到sys_open中，看看是如何获得一个未使用的文件描述符的。

下面的函数获得一个未使用的文件描述符。

--------------------------------------------------------------
/*
* Find an empty file descriptor entry, and mark it busy.
*/
int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;

error = -EMFILE;
write_lock(&files->file_lock);

repeat:
fd = find_next_zero_bit(files->open_fds,
files->max_fdset,
files->next_fd); /*<--------从原有位图中找一个bit为0，这样它对应的描述符为空*/

/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)/*<-------------???*/
goto out;

/* Do we need to expand the fdset array? */
if (fd >= files->max_fdset) {
/*<-------------------如果老的位图数组已经满了，那么扩大数组*/
error = expand_fdset(files, fd);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}

/*
* Check whether we need to expand the fd array.
*/
if (fd >= files->max_fds) {/*<----------------------同样，如必要扩大描述符数组*/
error = expand_fd_array(files, fd);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}

FD_SET(fd, files->open_fds);/*<----------------------将该描述符对应位图的bit置1*/
FD_CLR(fd,
files->close_on_exec);/*<-----------------将该位清0，如果要置1，需调用fcntl*/
files->next_fd = fd + 1;/*<--------------------------下次查找从下一个bit开始*/
#if 1
/* Sanity check */
if (files->fd[fd] != NULL) {
printk("get_unused_fd: slot %d not NULL!/n", fd);
files->fd[fd] = NULL;
}
#endif
error = fd;

out:
write_unlock(&files->file_lock);
return error;
}
--------------------------------------------------------------------------------

在sys_open中，当获得空的描述符fd以及一个file结构指针f后，调用

fd_install(fd, f)

将描述符数组下标为fd的元素置为指针f:



---------------------------------------------------



static inline void fd_install(unsigned int fd, struct file * file)



{



	struct files_struct *files = current->files;



	



	write_lock(&files->file_lock);



	if (files->fd[fd])



		BUG();



	files->fd[fd] = file;/*<-------------------设置指针*/



	write_unlock(&files->file_lock);



}



----------------------------------------------------

这个线索下面的帖子是讨论fork和execve的，关于sys_open的内容在系统调用open下。

我们的

href=http://www2.linuxforum.net/ker_plan/index/main.htm>内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-05-08 11:10

文件指针的复制

[re: lucian_yao ]

文件指针的复制

在fork(以及clone)中，有件非常重要的工作，就是复制文件指针，从而使得父子任务可以对相同的文件操作。

fork与clone是不同的。如图:

在clone中，复制了父任务的task_struct,这样父子指向同样的files_struct，以后无论父子谁打开/关闭文件，对另一方来说，也相当于有同样的操作。也许就是所谓在同一环境中吧。

而在fork中，又复制了files_struct,并且复制了files_struct指向的fd_array,这样两个fd_array相同的描述符都指向同样的文件。
与clone不同的是，在fork结束时，父子初始状态是一致的，但是以后父子可以有独立的打开/关闭文件的状态(以及close_on_exec状态)，进而可以更换某个文件描述符对应文件指针使得同样的描述符对应不同的file结构。

我们的
内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-05-08 11:11

"文件指针的复制"的源码

[re: lucian_yao ]

"文件指针的复制"的源码

-------------------------------------------------------------------------
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
int open_files, nfds, size, i, error = 0;

/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
goto out;

if (clone_flags & CLONE_FILES) {/*<-------------------------------如果是clone,则不需要复制数组*/
atomic_inc(&oldf->count);
goto out;
}

tsk->files = NULL;
error = -ENOMEM;
newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);/*<-------------分配一个新的files_struct*/
if (!newf)
goto out;

atomic_set(&newf->count, 1);

newf->file_lock = RW_LOCK_UNLOCKED;
newf->next_fd = 0;
newf->max_fds = NR_OPEN_DEFAULT;
newf->max_fdset = __FD_SETSIZE;
newf->close_on_exec = &newf->close_on_exec_init;/*<---------------首先指向内置的数组，但是由于父亲的数组可能已经比这个大了，所以可能还会扩展*/
newf->open_fds = &newf->open_fds_init;
newf->fd = &newf->fd_array[0];

/* We don't yet have the oldf readlock, but even if the old
fdset gets grown now, we'll only copy up to "size" fds */
size = oldf->max_fdset;
if (size > __FD_SETSIZE) {/*<---------------------------------------如果父任务的位图数组已经扩展过了，那么子任务的就不能用内置数组了，也要扩展*/
newf->max_fdset = 0;
write_lock(&newf->file_lock);
error = expand_fdset(newf, size-1);
write_unlock(&newf->file_lock);
if (error)
goto out_release;
}
read_lock(&oldf->file_lock);

open_files = count_open_files(oldf, size);

/*
* Check whether we need to allocate a larger fd array.
* Note: we're not a clone task, so the open count won't
* change.
*/
nfds = NR_OPEN_DEFAULT;
if (open_files > nfds) {/*<----------------------------------------同样，如果父亲的已经扩展过了，子任务的也要扩展*/
read_unlock(&oldf->file_lock);
newf->max_fds = 0;
write_lock(&newf->file_lock);
error = expand_fd_array(newf, open_files-1);/*<------------注意这里，并没有完全照搬父亲的描述符数组，可能有所缩减(由open_files决定)*/
write_unlock(&newf->file_lock);
if (error)
goto out_release;
nfds = newf->max_fds;
read_lock(&oldf->file_lock);
}

old_fds = oldf->fd;
new_fds = newf->fd;

memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);/*<------------------复制位图数组*/
memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);

for (i = open_files; i != 0; i--) {/*<-------------------------------------------------------复制描述符数组(不一定完全复制，由open_files决定)*/
struct file *f = *old_fds++;
if (f)/*<----------------------------------------------------------------------------增加file结构的引用数*/
get_file(f);
*new_fds++ = f;
}
read_unlock(&oldf->file_lock);

/* compute the remainder to be cleared */
size = (newf->max_fds - open_files) * sizeof(struct file *);

/* This is long word aligned thus could use a optimized version */
memset(new_fds, 0, size);

if (newf->max_fdset > open_files) {
int left = (newf->max_fdset-open_files)/8;
int start = open_files / (8 * sizeof(unsigned long));

memset(&newf->open_fds->fds_bits[start], 0, left);
memset(&newf->close_on_exec->fds_bits[start], 0, left);
}

tsk->files = newf;
error = 0;
out:
return error;

out_release:
free_fdset (newf->close_on_exec, newf->max_fdset);
free_fdset (newf->open_fds, newf->max_fdset);
kmem_cache_free(files_cachep, newf);
goto out;
}

我们的
内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-05-08 11:14

close_on_exec的使用

[re: lucian_yao ]

看看close_on_exec标志是如何使execve关闭对应的文件的

do_execve 中调用 search_binary_handler,后者有下面的语句:



-------------------------------------



int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;



... ...



retval = fn(bprm, regs);



-------------------------------------

而fmt->load_binary在下面定义:



------------------------------------



struct linux_binfmt {



	struct linux_binfmt * next;



	struct module *module;



	int (*load_binary)(struct linux_binprm *, struct  pt_regs *

regs);/*<----------这里定义*/



	int (*load_shlib)(struct file *);



	int (*core_dump)(long signr, struct pt_regs * regs, struct file * file);



	unsigned long min_coredump;	/* minimal dump size */



};







static struct linux_binfmt elf_format = {



	NULL, THIS_MODULE, /*defined here!*/load_elf_binary, load_elf_library, elf_core_dump,

ELF_EXEC_PAGESIZE



};



------------------------------------

load_elf_binary 调用 flush_old_exec，后者调用 flush_old_files(current->files);
flush_old_files做的事情是:

把那些对应标志为1的文件关闭。



------------------------------------------------------



static inline void flush_old_files(struct files_struct * files)



{



	long j = -1;







	write_lock(&files->file_lock);



	for (;;) {



		unsigned long set, i;







		j++;



		i = j * __NFDBITS;



		if (i >= files->max_fds || i >= files->max_fdset)



			break;



		set = files->close_on_exec->fds_bits[j];



		if (!set)



			continue;



		files->close_on_exec->fds_bits[j] = 0;



		write_unlock(&files->file_lock);



		for ( ; set ; i++,set >>= 1) {



			if (set & 1) {



/*这里!*/				sys_close(i);



			}



		}



		write_lock(&files->file_lock);







	}



	write_unlock(&files->file_lock);



}



--------------------------------------------------------

我们的

href=http://www2.linuxforum.net/ker_plan/index/main.htm>内核文档与源码学习

文章选项：

lucian_yao

(addict)
01-05-08 11:21

小结

[re: lucian_yao ]

在sys_open中，我们要完成几件事:
1.找到一个未使用的fd
2.根据open中提供的路径字符串找到(创建)对应的dentry(从而找到inode)
3.创建file结构，使得它指向这个dentry
4.在描述符数组中fd的位置设为指向file结构的指针

其中，1，4前面已经详细研究。2,3在sys_open中调用的filp_open中实现的。其中，

根据路径找dentry是由open_namei完成的
创建file结构是由dentry_open完成的

下面将分别详细研究open_namei，也就是"路径解析",以及创建file结构

我们的
内核文档与源码学习