1. 函数原型
#include <unistd.h>
int dup(int oldfd);
int dup2(int oldfd, int newfd);
功能:
复制文件描述符,使多个文件描述符指向同一个文件。
返回值:
成功:dup函数返回当前系统可用的最小整数值。
dup2函数返回第一个不小于newfd的整数值。也就是分为两种情况:
①、如果newfd已经打开,则先将其关闭,再复制文件描述符。
②、如果newfd等于oldfd,则dup2返回newfd, 而不关闭它。
失败:均返回-1,并设置errno。
注意:通过dup和dup2创建的文件描述符都有自己的一套文件描述符标志,并不继承原文件描述符的属性。比如close-on-exec和non-blocking
2. fcntl 函数
复制一个描述符的另一种方法是使用fcntl 函数。
dup(fd);
等同于:
fcntl(fd, F_DUPFD, 0);
dup2(fd_old, fd_new);
等同于:
close(fd_new);
fcntl(fd_old, F_DUPFD, fd_new);
dup2 也并不完全等同于close 加fcntl,他们之间的区别是:
- dup2是一个原子操作,而close 及fcntl则包括两个函数调用,而有可能在close 和fcntl之间插入执行信号捕获函数,它可能修改文件描述符。
- dup2 和fcntl 有某些不同的errno
3. 文件描述符内核实现
在Linux中每一个进程的数据是存储在一个task_struct结构(定义在sched.h中)中的。
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
struct thread_info *thread_info;
atomic_t usage;
unsigned long flags; /* per process flags, defined below */
unsigned long ptrace;
int lock_depth; /* Lock depth */
int prio, static_prio;
struct list_head run_list;
prio_array_t *array;
unsigned long sleep_avg;
unsigned long long timestamp, last_ran;
int activated;
unsigned long policy;
cpumask_t cpus_allowed;
unsigned int time_slice, first_time_slice;
#ifdef CONFIG_SCHEDSTATS
struct sched_info sched_info;
#endif
struct list_head tasks;
/*
* ptrace_list/ptrace_children forms the list of my children
* that were stolen by a ptracer.
*/
struct list_head ptrace_children;
struct list_head ptrace_list;
struct mm_struct *mm, *active_mm;
/* task state */
struct linux_binfmt *binfmt;
long exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned long personality;
unsigned did_exec:1;
pid_t pid;
pid_t tgid;
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->parent->pid)
*/
struct task_struct *real_parent; /* real parent process (when being debugged) */
struct task_struct *parent; /* parent process */
/*
* children/sibling forms the list of my children plus the
* tasks I'm ptracing.
*/
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
struct task_struct *group_leader; /* threadgroup leader */
/* PID/PID hash table linkage. */
struct pid pids[PIDTYPE_MAX];
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
unsigned long rt_priority;
unsigned long it_real_value, it_real_incr;
cputime_t it_virt_value, it_virt_incr;
cputime_t it_prof_value, it_prof_incr;
struct timer_list real_timer;
cputime_t utime, stime;
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
/* process credentials */
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
struct group_info *group_info;
kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
unsigned keep_capabilities:1;
struct user_struct *user;
#ifdef CONFIG_KEYS
struct key *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process (CLONE_THREAD) */
struct key *thread_keyring; /* keyring private to this thread */
#endif
int oomkilladj; /* OOM kill score adjustment (bit shift). */
char comm[TASK_COMM_LEN];
/* file system info */
int link_count, total_link_count;
/* ipc stuff */
struct sysv_sem sysvsem;
/* CPU-specific state of this task */
struct thread_struct thread;
/* filesystem information */
struct fs_struct *fs;
/* open file information */
struct files_struct *files;
/* namespace */
struct namespace *namespace;
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
sigset_t blocked, real_blocked;
struct sigpending pending;
unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
void *security;
struct audit_context *audit_context;
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
/* context-switch lock */
spinlock_t switch_lock;
/* journalling filesystem info */
void *journal_info;
/* VM state */
struct reclaim_state *reclaim_state;
struct dentry *proc_dentry;
struct backing_dev_info *backing_dev_info;
struct io_context *io_context;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
/*
* current io wait handle: wait queue entry to use for io waits
* If this thread is processing aio, this points at the waitqueue
* inside the currently handled kiocb. It may be NULL (i.e. default
* to a stack based synchronous wait) if its doing sync IO.
*/
wait_queue_t *io_wait;
/* i/o counters(bytes read/written, #syscalls */
u64 rchar, wchar, syscr, syscw;
#if defined(CONFIG_BSD_PROCESS_ACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
clock_t acct_stimexpd; /* clock_t-converted stime since last update */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
short il_next;
#endif
};
该结构中有一个用于保存打开文件信息的成员:files,该成员类型是:struct files_struct*(定义在file.h)。
/*
* Open file table structure
*/
struct files_struct {
atomic_t count;
spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
int max_fds;
int max_fdset;
int next_fd;
struct file ** fd; /* current fd array */
fd_set *close_on_exec;
fd_set *open_fds;
fd_set close_on_exec_init;
fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT];
};
可以看到该结构中保存了所有与进程打开文件相关的信息,其中fd_array是一个struct file*(定义在file.h)类型的数组。
struct file {
struct list_head f_list;
struct dentry *f_dentry;
struct vfsmount *f_vfsmnt;
struct file_operations *f_op;
atomic_t f_count;
unsigned int f_flags;
mode_t f_mode;
int f_error;
loff_t f_pos;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;
size_t f_maxcount;
unsigned long f_version;
void *f_security;
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
};
struct file就是保存了每个打开文件信息的数据结构。
用图表示如下:
- task_struct 中有一个用于保存打开文件信息的成员:files,files的结构为struct files_struct,这就是上图中最左侧;
- files_struct 中有两个成员:struct file ** fd 和struct file * fd_array[NR_OPEN_DEFAULT];
- file 中存放就是上图中中间的部分,文件表项,里面存放文件的状态标志、当前文件偏移量和v 节点指针;
- v节点指针指向v 节点表;
4. dup 内核实现
static inline long
dup (int fd)
{
return sys_dup(fd);
}
这里看到dup调用了函数sys_dup。
asmlinkage long sys_dup(unsigned int fildes)
{
int ret = -EBADF;
struct file * file = fget(fildes);
if (file)
ret = dupfd(file, 0);
return ret;
}
在sys_dup函数中,关键的就是两步,fget获取指定文件描述符的struct file指针,然后调用dupfd,至于dupfd的具体实现,我们接着往下走
struct file fastcall *fget(unsigned int fd)
{
struct file *file;
struct files_struct *files = current->files;
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (file)
get_file(file);
spin_unlock(&files->file_lock);
return file;
}
可以看到fget函数的实现就是首先获取一个files_struct指针,我们知道files_struct保存了所有打开文件信息(其中current是当前进程的struct task_struct指针),然后加锁,调用fcheck_files,获取file指针,如果file不为空,则调用get_file,下面我们看下这两个函数的实现。
static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
{
struct file * file = NULL;
if (fd < files->max_fds)
file = files->fd[fd];
return file;
}
#define get_file(x) atomic_inc(&(x)->f_count)
现在已经可以知道,fcheck_files函数的具体步骤是首先判断给定的文件描述符fd是否小于最大文件描述符max_fds,如果小于,则返回fd数组中对应该fd下标的指针。
get_file的作用是原子的增加f_count,也就是该文件的引用计数(在close的时候会减这个值)。
现在再回到sys_dup中,看一下dumfd的实现。
static int dupfd(struct file *file, unsigned int start)
{
struct files_struct * files = current->files;
int fd;
spin_lock(&files->file_lock);
fd = locate_fd(files, file, start);
if (fd >= 0) {
FD_SET(fd, files->open_fds);
FD_CLR(fd, files->close_on_exec);
spin_unlock(&files->file_lock);
fd_install(fd, file);
} else {
spin_unlock(&files->file_lock);
fput(file);
}
return fd;
}
该函数的具体步骤如下:
1、通过current->files获取struct files_struct指针。
2、加锁,完成后会解锁。
3、调用locate_fd函数获取一个fd,具体获取规则下面再看。
4、如果获取到的fd>=0,则调用FD_SET、FD_CLR、解锁、fd_install。关键在于fd_install;否则调用解锁、fput。
下面再看一下locate_fd、fd_install、fput的实现。
/*
* locate_fd finds a free file descriptor in the open_fds fdset,
* expanding the fd arrays if necessary. Must be called with the
* file_lock held for write.
*/
static int locate_fd(struct files_struct *files,
struct file *file, unsigned int orig_start)
{
unsigned int newfd;
unsigned int start;
int error;
error = -EINVAL;
if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
repeat:
/*
* Someone might have closed fd's in the range
* orig_start..files->next_fd
*/
start = orig_start;
if (start < files->next_fd)
start = files->next_fd;
newfd = start;
if (start < files->max_fdset) {
newfd = find_next_zero_bit(files->open_fds->fds_bits,
files->max_fdset, start);
}
error = -EMFILE;
if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
error = expand_files(files, newfd);
if (error < )
goto out;
/*
* If we needed to expand the fs array we
* might have blocked - try again.
*/
if (error)
goto repeat;
if (start <= files->next_fd)
files->next_fd = newfd + ;
error = newfd;
out:
return error;
}
根据该函数的注释即可知道它的所用就是:找到一个没有被使用的文件描述符,从start开始(这里就是dup和dup2的区别所在)