linux 文件IO操作之dup 和dup2函数

1. 函数原型

 #include <unistd.h>
 
 int dup(int oldfd);
 int dup2(int oldfd, int newfd);

功能:

复制文件描述符,使多个文件描述符指向同一个文件。

返回值:

 成功:dup函数返回当前系统可用的最小整数值。
    dup2函数返回第一个不小于newfd的整数值。也就是分为两种情况:
    ①、如果newfd已经打开,则先将其关闭,再复制文件描述符。
    ②、如果newfd等于oldfd,则dup2返回newfd, 而不关闭它。

 失败:均返回-1,并设置errno。

注意:通过dup和dup2创建的文件描述符都有自己的一套文件描述符标志,并不继承原文件描述符的属性。比如close-on-exec和non-blocking

 

2. fcntl 函数

复制一个描述符的另一种方法是使用fcntl 函数。

dup(fd);

等同于:

fcntl(fd, F_DUPFD, 0);
dup2(fd_old, fd_new);

等同于:

close(fd_new);
fcntl(fd_old, F_DUPFD, fd_new);

dup2 也并不完全等同于close 加fcntl,他们之间的区别是:

  • dup2是一个原子操作,而close 及fcntl则包括两个函数调用,而有可能在close 和fcntl之间插入执行信号捕获函数,它可能修改文件描述符。
  • dup2 和fcntl 有某些不同的errno

 

3. 文件描述符内核实现

在Linux中每一个进程的数据是存储在一个task_struct结构(定义在sched.h中)中的。

struct task_struct {
    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
    struct thread_info *thread_info;
    atomic_t usage;
    unsigned long flags;    /* per process flags, defined below */
    unsigned long ptrace;

    int lock_depth;        /* Lock depth */

    int prio, static_prio;
    struct list_head run_list;
    prio_array_t *array;

    unsigned long sleep_avg;
    unsigned long long timestamp, last_ran;
    int activated;

    unsigned long policy;
    cpumask_t cpus_allowed;
    unsigned int time_slice, first_time_slice;

#ifdef CONFIG_SCHEDSTATS
    struct sched_info sched_info;
#endif

    struct list_head tasks;
    /*
     * ptrace_list/ptrace_children forms the list of my children
     * that were stolen by a ptracer.
     */
    struct list_head ptrace_children;
    struct list_head ptrace_list;

    struct mm_struct *mm, *active_mm;

/* task state */
    struct linux_binfmt *binfmt;
    long exit_state;
    int exit_code, exit_signal;
    int pdeath_signal;  /*  The signal sent when the parent dies  */
    /* ??? */
    unsigned long personality;
    unsigned did_exec:1;
    pid_t pid;
    pid_t tgid;
    /*
     * pointers to (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with
     * p->parent->pid)
     */
    struct task_struct *real_parent; /* real parent process (when being debugged) */
    struct task_struct *parent;    /* parent process */
    /*
     * children/sibling forms the list of my children plus the
     * tasks I'm ptracing.
     */
    struct list_head children;    /* list of my children */
    struct list_head sibling;    /* linkage in my parent's children list */
    struct task_struct *group_leader;    /* threadgroup leader */

    /* PID/PID hash table linkage. */
    struct pid pids[PIDTYPE_MAX];

    struct completion *vfork_done;        /* for vfork() */
    int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */

    unsigned long rt_priority;
    unsigned long it_real_value, it_real_incr;
    cputime_t it_virt_value, it_virt_incr;
    cputime_t it_prof_value, it_prof_incr;
    struct timer_list real_timer;
    cputime_t utime, stime;
    unsigned long nvcsw, nivcsw; /* context switch counts */
    struct timespec start_time;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    unsigned long min_flt, maj_flt;
/* process credentials */
    uid_t uid,euid,suid,fsuid;
    gid_t gid,egid,sgid,fsgid;
    struct group_info *group_info;
    kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
    unsigned keep_capabilities:1;
    struct user_struct *user;
#ifdef CONFIG_KEYS
    struct key *session_keyring;    /* keyring inherited over fork */
    struct key *process_keyring;    /* keyring private to this process (CLONE_THREAD) */
    struct key *thread_keyring;    /* keyring private to this thread */
#endif
    int oomkilladj; /* OOM kill score adjustment (bit shift). */
    char comm[TASK_COMM_LEN];
/* file system info */
    int link_count, total_link_count;
/* ipc stuff */
    struct sysv_sem sysvsem;
/* CPU-specific state of this task */
    struct thread_struct thread;
/* filesystem information */
    struct fs_struct *fs;
/* open file information */
    struct files_struct *files;
/* namespace */
    struct namespace *namespace;
/* signal handlers */
    struct signal_struct *signal;
    struct sighand_struct *sighand;

    sigset_t blocked, real_blocked;
    struct sigpending pending;

    unsigned long sas_ss_sp;
    size_t sas_ss_size;
    int (*notifier)(void *priv);
    void *notifier_data;
    sigset_t *notifier_mask;

    void *security;
    struct audit_context *audit_context;

/* Thread group tracking */
       u32 parent_exec_id;
       u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
    spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
    spinlock_t proc_lock;
/* context-switch lock */
    spinlock_t switch_lock;

/* journalling filesystem info */
    void *journal_info;

/* VM state */
    struct reclaim_state *reclaim_state;

    struct dentry *proc_dentry;
    struct backing_dev_info *backing_dev_info;

    struct io_context *io_context;

    unsigned long ptrace_message;
    siginfo_t *last_siginfo; /* For ptrace use.  */
/*
 * current io wait handle: wait queue entry to use for io waits
 * If this thread is processing aio, this points at the waitqueue
 * inside the currently handled kiocb. It may be NULL (i.e. default
 * to a stack based synchronous wait) if its doing sync IO.
 */
    wait_queue_t *io_wait;
/* i/o counters(bytes read/written, #syscalls */
    u64 rchar, wchar, syscr, syscw;
#if defined(CONFIG_BSD_PROCESS_ACCT)
    u64 acct_rss_mem1;    /* accumulated rss usage */
    u64 acct_vm_mem1;    /* accumulated virtual memory usage */
    clock_t acct_stimexpd;    /* clock_t-converted stime since last update */
#endif
#ifdef CONFIG_NUMA
      struct mempolicy *mempolicy;
    short il_next;
#endif
};

该结构中有一个用于保存打开文件信息的成员:files,该成员类型是:struct files_struct*(定义在file.h)。

/*
 * Open file table structure
 */
struct files_struct {
        atomic_t count;
        spinlock_t file_lock;     /* Protects all the below members.  Nests inside tsk->alloc_lock */
        int max_fds;
        int max_fdset;
        int next_fd;
        struct file ** fd;      /* current fd array */
        fd_set *close_on_exec;
        fd_set *open_fds;
        fd_set close_on_exec_init;
        fd_set open_fds_init;
        struct file * fd_array[NR_OPEN_DEFAULT];
};

可以看到该结构中保存了所有与进程打开文件相关的信息,其中fd_array是一个struct file*(定义在file.h)类型的数组。

struct file {
    struct list_head    f_list;
    struct dentry        *f_dentry;
    struct vfsmount         *f_vfsmnt;
    struct file_operations    *f_op;
    atomic_t        f_count;
    unsigned int         f_flags;
    mode_t            f_mode;
    int            f_error;
    loff_t            f_pos;
    struct fown_struct    f_owner;
    unsigned int        f_uid, f_gid;
    struct file_ra_state    f_ra;

    size_t            f_maxcount;
    unsigned long        f_version;
    void            *f_security;

    /* needed for tty driver, and maybe others */
    void            *private_data;

#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct list_head    f_ep_links;
    spinlock_t        f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
    struct address_space    *f_mapping;
};

struct file就是保存了每个打开文件信息的数据结构。

 

用图表示如下:

  • task_struct 中有一个用于保存打开文件信息的成员:files,files的结构为struct files_struct,这就是上图中最左侧;
  • files_struct 中有两个成员:struct file ** fd 和struct file * fd_array[NR_OPEN_DEFAULT];
  • file 中存放就是上图中中间的部分,文件表项,里面存放文件的状态标志、当前文件偏移量和v 节点指针;
  • v节点指针指向v 节点表;

 

4. dup 内核实现

 static inline long
 dup (int fd)
 {
     return sys_dup(fd);
 }

这里看到dup调用了函数sys_dup。

 asmlinkage long sys_dup(unsigned int fildes)
 {
     int ret = -EBADF;
     struct file * file = fget(fildes);
 
     if (file)
         ret = dupfd(file, 0);
     return ret;
 }

在sys_dup函数中,关键的就是两步,fget获取指定文件描述符的struct file指针,然后调用dupfd,至于dupfd的具体实现,我们接着往下走

 struct file fastcall *fget(unsigned int fd)
 {
     struct file *file;
     struct files_struct *files = current->files;
 
     spin_lock(&files->file_lock);
     file = fcheck_files(files, fd);
     if (file)
         get_file(file);
     spin_unlock(&files->file_lock);
     return file;
 }

可以看到fget函数的实现就是首先获取一个files_struct指针,我们知道files_struct保存了所有打开文件信息(其中current是当前进程的struct task_struct指针),然后加锁,调用fcheck_files,获取file指针,如果file不为空,则调用get_file,下面我们看下这两个函数的实现。

 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
 {
     struct file * file = NULL;
 
     if (fd < files->max_fds)
         file = files->fd[fd];
     return file;
 }
 
 #define get_file(x)    atomic_inc(&(x)->f_count)

现在已经可以知道,fcheck_files函数的具体步骤是首先判断给定的文件描述符fd是否小于最大文件描述符max_fds,如果小于,则返回fd数组中对应该fd下标的指针。

get_file的作用是原子的增加f_count,也就是该文件的引用计数(在close的时候会减这个值)。

现在再回到sys_dup中,看一下dumfd的实现。

 static int dupfd(struct file *file, unsigned int start)
 {
     struct files_struct * files = current->files;
     int fd;
 
     spin_lock(&files->file_lock);
     fd = locate_fd(files, file, start);
     if (fd >= 0) {
         FD_SET(fd, files->open_fds);
         FD_CLR(fd, files->close_on_exec);
         spin_unlock(&files->file_lock);
         fd_install(fd, file);
     } else {
         spin_unlock(&files->file_lock);
         fput(file);
     }
 
     return fd;
 }

该函数的具体步骤如下:

1、通过current->files获取struct files_struct指针。

2、加锁,完成后会解锁。

3、调用locate_fd函数获取一个fd,具体获取规则下面再看。

4、如果获取到的fd>=0,则调用FD_SET、FD_CLR、解锁、fd_install。关键在于fd_install;否则调用解锁、fput。

下面再看一下locate_fd、fd_install、fput的实现。

 /*
  * locate_fd finds a free file descriptor in the open_fds fdset,
  * expanding the fd arrays if necessary.  Must be called with the
  * file_lock held for write.
  */
 
 static int locate_fd(struct files_struct *files,
                 struct file *file, unsigned int orig_start)
 {
     unsigned int newfd;
     unsigned int start;
     int error;
 
     error = -EINVAL;
     if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
         goto out;
 
 repeat:
     /*
      * Someone might have closed fd's in the range
      * orig_start..files->next_fd
      */
     start = orig_start;
     if (start < files->next_fd)
         start = files->next_fd;
 
     newfd = start;
     if (start < files->max_fdset) {
         newfd = find_next_zero_bit(files->open_fds->fds_bits,
             files->max_fdset, start);
     }
 
     error = -EMFILE;
     if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
         goto out;
 
     error = expand_files(files, newfd);
     if (error < )
         goto out;
 
     /*
      * If we needed to expand the fs array we
      * might have blocked - try again.
      */
     if (error)
         goto repeat;
 
     if (start <= files->next_fd)
         files->next_fd = newfd + ;
 
     error = newfd;
 
 out:
     return error;
 }

根据该函数的注释即可知道它的所用就是:找到一个没有被使用的文件描述符,从start开始(这里就是dup和dup2的区别所在)

 

 

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

私房菜

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值