进程管理(一)进程的诞生

一:进程的诞生

进程:一段可执行代码,还包括打开文件,挂起信号量,内存管理,处理器的状态,一个或者多个执行线程和数据段。(一般也称任务)表示:struct task_struct表示。

进程拥有独立的资源空间,线程共享资源空间。

1:init进程(linux启动的第一个进程)称为0号进程或者idle进程

start_kernel->rest_init->cpu_startup_entry->cpu_idle_loop

对核心结构体init_task进程的task_struct结构体通过宏INIT_TASK来赋值。

#define INIT_TASK(tsk)    \
{                                    \
    .state        = 0,                        \
    .stack        = &init_thread_info,                \-------#define init_thread_info (init_thread_union.thread_info)
    .usage        = ATOMIC_INIT(2),                \
    .flags        = PF_KTHREAD,                    \----------表明是一个内核线程
    .prio        = MAX_PRIO-20,                    \----------MAX_PRIO为140,此处prio为120,对应的nice值为0.关于prio和nice参考:prio和nice之间的关系。
    .static_prio    = MAX_PRIO-20,                    \
    .normal_prio    = MAX_PRIO-20,                    \
    .policy        = SCHED_NORMAL,                    \-------调度策略是SCHED_NORMAL。
    .cpus_allowed    = CPU_MASK_ALL,                    \
    .nr_cpus_allowed= NR_CPUS,                    \
    .mm        = NULL,                        \
    .active_mm    = &init_mm,                    \------------idle进程的内存管理结构数据
    .restart_block = {                        \
        .fn = do_no_restart_syscall,                \
    },                                \
    .se        = {                        \
        .group_node     = LIST_HEAD_INIT(tsk.se.group_node),    \
    },                                \
    .rt        = {                        \
        .run_list    = LIST_HEAD_INIT(tsk.rt.run_list),    \
        .time_slice    = RR_TIMESLICE,                \
    },                                \
    .tasks        = LIST_HEAD_INIT(tsk.tasks),            \
    INIT_PUSHABLE_TASKS(tsk)                    \
    INIT_CGROUP_SCHED(tsk)                        \
    .ptraced    = LIST_HEAD_INIT(tsk.ptraced),            \
    .ptrace_entry    = LIST_HEAD_INIT(tsk.ptrace_entry),        \
    .real_parent    = &tsk,                        \
    .parent        = &tsk,                        \
    .children    = LIST_HEAD_INIT(tsk.children),            \
    .sibling    = LIST_HEAD_INIT(tsk.sibling),            \
    .group_leader    = &tsk,                        \
    RCU_POINTER_INITIALIZER(real_cred, &init_cred),            \
    RCU_POINTER_INITIALIZER(cred, &init_cred),            \
    .comm        = INIT_TASK_COMM,                \
    .thread        = INIT_THREAD,                    \
    .fs        = &init_fs,                    \
    .files        = &init_files,                    \
    .signal        = &init_signals,                \
    .sighand    = &init_sighand,                \
    .nsproxy    = &init_nsproxy,                \
    .pending    = {                        \
        .list = LIST_HEAD_INIT(tsk.pending.list),        \
        .signal = {{0}}},                    \
    .blocked    = {{0}},                    \
    .alloc_lock    = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),        \
    .journal_info    = NULL,                        \
    .cpu_timers    = INIT_CPU_TIMERS(tsk.cpu_timers),        \
    .pi_lock    = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),    \
    .timer_slack_ns = 50000, /* 50 usec default slack */        \
    .pids = {                            \
        [PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),        \
        [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),        \
        [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),        \
    },                                \
    .thread_group    = LIST_HEAD_INIT(tsk.thread_group),        \
    .thread_node    = LIST_HEAD_INIT(init_signals.thread_head),    \
    INIT_IDS                            \
    INIT_PERF_EVENTS(tsk)                        \
    INIT_TRACE_IRQFLAGS                        \
    INIT_LOCKDEP                            \
    INIT_FTRACE_GRAPH                        \
    INIT_TRACE_RECURSION                        \
    INIT_TASK_RCU_PREEMPT(tsk)                    \
    INIT_TASK_RCU_TASKS(tsk)                    \
    INIT_CPUSET_SEQ(tsk)                        \
    INIT_RT_MUTEXES(tsk)                        \
    INIT_PREV_CPUTIME(tsk)                        \
    INIT_VTIME(tsk)                            \
    INIT_NUMA_BALANCING(tsk)                    \
    INIT_KASAN(tsk)                            \
}
init_thread_info被__init_task_data修饰,所以它会被固定在.data..init_task段中。

1.2:fork()进程

do_fork():也包括kernel_thread。

do_fork有5个参数:clone_flags:创建进程的标志位集合,stack_start:用户态栈的起始地址,stack_size:用户态栈的大小,parent_tidptr和child_tidptr:指向用户空间地址的两个指针,分别指向父子进程PID。

#define CSIGNAL        0x000000ff    /* signal mask to be sent at exit */
#define CLONE_VM    0x00000100    /* set if VM shared between processes */-------------------------父子进程运行在同一个虚拟空间
#define CLONE_FS    0x00000200    /* set if fs info shared between processes */--------------------父子进程共享文件系统信息
#define CLONE_FILES    0x00000400    /* set if open files shared between processes */--------------父子进程共享文件描述符表
#define CLONE_SIGHAND    0x00000800    /* set if signal handlers and blocked signals shared */-----父子进程共享信号处理函数表
#define CLONE_PTRACE    0x00002000    /* set if we want to let tracing continue on the child too */---------父进程被跟踪ptrace,子进程也会被跟踪。
#define CLONE_VFORK    0x00004000    /* set if the parent wants the child to wake it up on mm_release */----在创建子进程时启动完成机制completion,wait_for_completion()会使父进程进入睡眠等待,知道子进程调用execve()或exit()释放虚拟内存资源。
#define CLONE_PARENT    0x00008000    /* set if we want to have the same parent as the cloner */------------新创建的进程是兄弟关系,而不是父子关系。
#define CLONE_THREAD    0x00010000    /* Same thread group? */
#define CLONE_NEWNS    0x00020000    /* New mount namespace group */------------父子进程不共享mount namespace
#define CLONE_SYSVSEM    0x00040000    /* share system V SEM_UNDO semantics */--
#define CLONE_SETTLS    0x00080000    /* create a new TLS for the child */
#define CLONE_PARENT_SETTID    0x00100000    /* set the TID in the parent */
#define CLONE_CHILD_CLEARTID    0x00200000    /* clear the TID in the child */
#define CLONE_DETACHED        0x00400000    /* Unused, ignored */
#define CLONE_UNTRACED        0x00800000    /* set if the tracing process can't force CLONE_PTRACE on this clone */
#define CLONE_CHILD_SETTID    0x01000000    /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
   and is now available for re-use. */
#define CLONE_NEWUTS        0x04000000    /* New utsname namespace */
#define CLONE_NEWIPC        0x08000000    /* New ipc namespace */
#define CLONE_NEWUSER        0x10000000    /* New user namespace */----------子进程要创建新的User Namespace。
#define CLONE_NEWPID        0x20000000    /* New pid namespace */------------创建一个新的PID namespace。
#define CLONE_NEWNET        0x40000000    /* New network namespace */
#define CLONE_IO        0x80000000    /* Clone io context */

do_fork主要调用copy_process进行处理,然后唤醒创建的进程

long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
{
    struct task_struct *p;
    int trace = 0;
    long nr;
    if (!(clone_flags & CLONE_UNTRACED)) {
        if (clone_flags & CLONE_VFORK)
            trace = PTRACE_EVENT_VFORK;
        else if ((clone_flags & CSIGNAL) != SIGCHLD)
            trace = PTRACE_EVENT_CLONE;
        else
            trace = PTRACE_EVENT_FORK;

        if (likely(!ptrace_event_enabled(current, trace)))
            trace = 0;
    }

    p = copy_process(clone_flags, stack_start, stack_size,
             child_tidptr, NULL, trace);//调用该函数成功创建一个进程
    if (!IS_ERR(p)) {
        struct completion vfork;
        struct pid *pid;

        trace_sched_process_fork(current, p);

        pid = get_task_pid(p, PIDTYPE_PID);
        nr = pid_vnr(pid);

        if (clone_flags & CLONE_PARENT_SETTID)
            put_user(nr, parent_tidptr);

        if (clone_flags & CLONE_VFORK) {------------------对于CLONE_VFORK标志位,初始化vfork完成量
            p->vfork_done = &vfork;
            init_completion(&vfork);
            get_task_struct(p);
        }

        wake_up_new_task(p);------------------------------唤醒新创建的进程p,也即把进程加入调度器里接受调度执行。

        /* forking complete and child started to run, tell ptracer */
        if (unlikely(trace))
            ptrace_event_pid(trace, pid);

        if (clone_flags & CLONE_VFORK) {
            if (!wait_for_vfork_done(p, &vfork))---------等待子进程释放p->vfork_done完成量
                ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
        }

        put_pid(pid);
    } else {
        nr = PTR_ERR(p);
    }
    return nr;
}
static struct task_struct *copy_process(unsigned long clone_flags,
                    unsigned long stack_start,
                    unsigned long stack_size,
                    int __user *child_tidptr,
                    struct pid *pid,
                    int trace)
{
    int retval;
    struct task_struct *p;

    if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
        return ERR_PTR(-EINVAL);

    if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))---------------CLONE_FS(父子进程共享文件系统)和CLONE_NEWNS/CLONE_NEWUSER(父子进程不共享mount/user namespace)冲突,
        return ERR_PTR(-EINVAL);

    /*
     * Thread groups must share signals as well, and detached threads
     * can only be started up within the thread group.
     */
    if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))--------------------线程组共享信号处理函数
        return ERR_PTR(-EINVAL);

    /*
     * Shared signal handlers imply shared VM. By way of the above,
     * thread groups also imply shared VM. Blocking this case allows
     * for various simplifications in other code.
     */
    if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))----------------------共享信号处理函数需要共享内存空间
        return ERR_PTR(-EINVAL);

    /*
     * Siblings of global init remain as zombies on exit since they are
     * not reaped by their parent (swapper). To solve this and to avoid
     * multi-rooted process trees, prevent global and container-inits
     * from creating siblings.
     */
    if ((clone_flags & CLONE_PARENT) &&
                current->signal->flags & SIGNAL_UNKILLABLE)-----------------------------init是所有用户空间进程父进程,如果和init兄弟关系,那么进程将无法被回收,从而变成僵尸进程。
        return ERR_PTR(-EINVAL);

    /*
     * If the new process will be in a different pid or user namespace
     * do not allow it to share a thread group or signal handlers or
     * parent with the forking task.
     */
    if (clone_flags & CLONE_SIGHAND) {---------------------------------------------------新的pid或user命名空间和共享信号处理以及线程组冲突,因为他们在namespace中访问隔离。
        if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
            (task_active_pid_ns(current) !=
                current->nsproxy->pid_ns_for_children))
            return ERR_PTR(-EINVAL);
    }

    retval = security_task_create(clone_flags);
    if (retval)
        goto fork_out;

    retval = -ENOMEM;
    p = dup_task_struct(current);-------------------------------------------------------分配一个task_struct实例,将当前进程current作为母板。
    if (!p)
        goto fork_out;

    ftrace_graph_init_task(p);

    rt_mutex_init_task(p);

#ifdef CONFIG_PROVE_LOCKING
    DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
    DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
    retval = -EAGAIN;
    if (atomic_read(&p->real_cred->user->processes) >=
            task_rlimit(p, RLIMIT_NPROC)) {
        if (p->real_cred->user != INIT_USER &&
            !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
            goto bad_fork_free;
    }
    current->flags &= ~PF_NPROC_EXCEEDED;

    retval = copy_creds(p, clone_flags);
    if (retval < 0)
        goto bad_fork_free;

    /*
     * If multiple threads are within copy_process(), then this check
     * triggers too late. This doesn't hurt, the check is only there
     * to stop root fork bombs.
     */
    retval = -EAGAIN;
    if (nr_threads >= max_threads)----------------------------------------------max_threads是系统允许最多线程个数,nr_threads是系统当前进程个数。
        goto bad_fork_cleanup_count;

    if (!try_module_get(task_thread_info(p)->exec_domain->module))
        goto bad_fork_cleanup_count;

    delayacct_tsk_init(p);    /* Must remain after dup_task_struct() */
    p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);---------------------------------告诉系统不使用超级用户权限,并且不是workqueue内核线程。
    p->flags |= PF_FORKNOEXEC;--------------------------------------------------执行fork但不立即执行
    INIT_LIST_HEAD(&p->children);-----------------------------------------------新进程的子进程链表
    INIT_LIST_HEAD(&p->sibling);------------------------------------------------新进程的兄弟进程链表
    rcu_copy_process(p); //对task_struct结构成员的初始化
    p->vfork_done = NULL;
    spin_lock_init(&p->alloc_lock);

    init_sigpending(&p->pending);

    p->utime = p->stime = p->gtime = 0;
    p->utimescaled = p->stimescaled = 0;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
    seqlock_init(&p->vtime_seqlock);
    p->vtime_snap = 0;
    p->vtime_snap_whence = VTIME_SLEEPING;
#endif

#if defined(SPLIT_RSS_COUNTING)
    memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif

    p->default_timer_slack_ns = current->timer_slack_ns;

    task_io_accounting_init(&p->ioac);
    acct_clear_integrals(p);

    posix_cpu_timers_init(p);
//初始化新进程的task_struct结构体。
    p->start_time = ktime_get_ns();
    p->real_start_time = ktime_get_boot_ns();
    p->io_context = NULL;
    p->audit_context = NULL;
    if (clone_flags & CLONE_THREAD)
        threadgroup_change_begin(current);
    cgroup_fork(p);
#ifdef CONFIG_NUMA
    p->mempolicy = mpol_dup(p->mempolicy);
    if (IS_ERR(p->mempolicy)) {
        retval = PTR_ERR(p->mempolicy);
        p->mempolicy = NULL;
        goto bad_fork_cleanup_threadgroup_lock;
    }
#endif...
#ifdef CONFIG_BCACHE
    p->sequential_io    = 0;
    p->sequential_io_avg    = 0;
#endif

    /* Perform scheduler related setup. Assign this task to a CPU. */
//接下来做内存空间,文件系统,信号系统,io系统等核心文件的复制操作。
    retval = sched_fork(clone_flags, p);-----------------------------------------初始化进程调度相关数据结构,将进程指定到某一CPU上。
    if (retval)
        goto bad_fork_cleanup_policy;

    retval = perf_event_init_task(p);                                                                                         
    if (retval)
        goto bad_fork_cleanup_policy;
    retval = audit_alloc(p);
    if (retval)
        goto bad_fork_cleanup_perf;
    /* copy all the process information */
    shm_init_task(p);
    retval = copy_semundo(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_audit;
    retval = copy_files(clone_flags, p);-----------------------------------------复制父进程打开的文件信息
    if (retval)
        goto bad_fork_cleanup_semundo;
    retval = copy_fs(clone_flags, p);--------------------------------------------复制父进程fs_struct信息
    if (retval)
        goto bad_fork_cleanup_files;
    retval = copy_sighand(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_fs;
    retval = copy_signal(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_sighand;
    retval = copy_mm(clone_flags, p);--------------------------------------------复制父进程的内存管理相关信息
    if (retval)
        goto bad_fork_cleanup_signal;
    retval = copy_namespaces(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_mm;
    retval = copy_io(clone_flags, p);--------------------------------------------复制父进程的io_context上下文信息
    if (retval)
        goto bad_fork_cleanup_namespaces;
    retval = copy_thread(clone_flags, stack_start, stack_size, p);
    if (retval)
        goto bad_fork_cleanup_io;

    if (pid != &init_struct_pid) { //init_struct_pid是init_task进程默认的配置,新进程需要重新分配pid数据结构
        retval = -ENOMEM;
        pid = alloc_pid(p->nsproxy->pid_ns_for_children);
        if (!pid)
            goto bad_fork_cleanup_io;
    }

    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
    /*
     * Clear TID on mm_release()?
     */
    p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
#ifdef CONFIG_BLOCK
    p->plug = NULL;
#endif
#ifdef CONFIG_FUTEX
    p->robust_list = NULL;
#ifdef CONFIG_COMPAT
    p->compat_robust_list = NULL;
#endif
    INIT_LIST_HEAD(&p->pi_state_list);
    p->pi_state_cache = NULL;
#endif
    /*
     * sigaltstack should be cleared when sharing the same VM
     */
    if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
        p->sas_ss_sp = p->sas_ss_size = 0;

    /*
     * Syscall tracing and stepping should be turned off in the
     * child regardless of CLONE_PTRACE.
     */
    user_disable_single_step(p);
    clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
#ifdef TIF_SYSCALL_EMU
    clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif
    clear_all_latency_tracing(p);

    /* ok, now we should be set up.. */
    p->pid = pid_nr(pid);-------------------------------------------------------获取新进程的pid
    if (clone_flags & CLONE_THREAD) {
        p->exit_signal = -1;
        p->group_leader = current->group_leader;
        p->tgid = current->tgid;
    } else {
        if (clone_flags & CLONE_PARENT)
            p->exit_signal = current->group_leader->exit_signal;
        else
            p->exit_signal = (clone_flags & CSIGNAL);
        p->group_leader = p;
        p->tgid = p->pid;
    }

    p->nr_dirtied = 0;
    p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
    p->dirty_paused_when = 0;

    p->pdeath_signal = 0;
    INIT_LIST_HEAD(&p->thread_group);
    p->task_works = NULL;

    /*
     * Make it visible to the rest of the system, but dont wake it up yet.
     * Need tasklist lock for parent etc handling!
     */
    write_lock_irq(&tasklist_lock);

    /* CLONE_PARENT re-uses the old parent */
    if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
        p->real_parent = current->real_parent;
        p->parent_exec_id = current->parent_exec_id;
    } else {
        p->real_parent = current;
        p->parent_exec_id = current->self_exec_id;
    }

    spin_lock(&current->sighand->siglock);

    /*
     * Copy seccomp details explicitly here, in case they were changed
     * before holding sighand lock.
     */
    copy_seccomp(p);

    recalc_sigpending();
    if (signal_pending(current)) {
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        retval = -ERESTARTNOINTR;
        goto bad_fork_free_pid;
    }

    if (likely(p->pid)) {
        ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);

        init_task_pid(p, PIDTYPE_PID, pid);
        if (thread_group_leader(p)) {
            init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
            init_task_pid(p, PIDTYPE_SID, task_session(current));

            if (is_child_reaper(pid)) {
                ns_of_pid(pid)->child_reaper = p;
                p->signal->flags |= SIGNAL_UNKILLABLE;
            }

            p->signal->leader_pid = pid;
            p->signal->tty = tty_kref_get(current->signal->tty);
            list_add_tail(&p->sibling, &p->real_parent->children);
            list_add_tail_rcu(&p->tasks, &init_task.tasks);
            attach_pid(p, PIDTYPE_PGID);
            attach_pid(p, PIDTYPE_SID);
            __this_cpu_inc(process_counts);
        } else {
            current->signal->nr_threads++;
            atomic_inc(&current->signal->live);
            atomic_inc(&current->signal->sigcnt);
            list_add_tail_rcu(&p->thread_group,
                      &p->group_leader->thread_group);
            list_add_tail_rcu(&p->thread_node,
                      &p->signal->thread_head);
        }
        attach_pid(p, PIDTYPE_PID);
        nr_threads++;---------------------------------------------------------当前进程计数递增
    }

    total_forks++;
    spin_unlock(&current->sighand->siglock);
    syscall_tracepoint_update(p);
    write_unlock_irq(&tasklist_lock);

    proc_fork_connector(p);
    cgroup_post_fork(p);
    if (clone_flags & CLONE_THREAD)
        threadgroup_change_end(current);
    perf_event_fork(p);

    trace_task_newtask(p, clone_flags);
    uprobe_copy_process(p, clone_flags);

    return p;----------------------------------------------------------------成功返回新进程的task_struct。
...return ERR_PTR(retval);---------------------------------------------------各种错误处理
}

接下来分析一些进程的参数的复制

 sched_fork的主要任务交给__sched_fork(),然后根据优先级选择调度sched_class类,并执行其task_fork。

最后设置新进程运行的CPU,如果不是当前CPU则需要迁移过来。

int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
    unsigned long flags;
    int cpu = get_cpu();-------------------------------------------------------首先关闭内核抢占,然后获取当前CPU id。

    __sched_fork(clone_flags, p);----------------------------------------------填充sched_entity数据结构,初始化调度相关设置。每个进程和线程都是一个调度实体
    /*
     * We mark the process as running here. This guarantees that
     * nobody will actually run it, and a signal or other external
     * event cannot wake it up and insert it on the runqueue either.
     */
//进程的实际运行状态,进程还没运行,还没有加入到就绪队列(runqueue),外部事件不能唤醒。
    p->state = TASK_RUNNING;---------------------------------------------------设置为运行状态,虽然还没有实际运行。

    /*
     * Make sure we do not leak PI boosting priority to the child.
     */
    p->prio = current->normal_prio;--------------------------------------------继承父进程normal_prio作为子进程prio

    /*
     * Revert to default priority/policy on fork if requested.
     */
    if (unlikely(p->sched_reset_on_fork)) {
        if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
            p->policy = SCHED_NORMAL;
            p->static_prio = NICE_TO_PRIO(0);
            p->rt_priority = 0;
        } else if (PRIO_TO_NICE(p->static_prio) < 0)
            p->static_prio = NICE_TO_PRIO(0);

        p->prio = p->normal_prio = __normal_prio(p);
        set_load_weight(p);

        /*
         * We don't need the reset flag anymore after the fork. It has
         * fulfilled its duty:
         */
        p->sched_reset_on_fork = 0;
    }

    if (dl_prio(p->prio)) {---------------------------------------------------SCHED_DEADLINE优先级应该是负值,即小于0。
        put_cpu();
        return -EAGAIN;
    } else if (rt_prio(p->prio)) {--------------------------------------------SCHED_RT优先级为0-99
        p->sched_class = &rt_sched_class;
    } else {------------------------------------------------------------------SCHED_FAIR优先级为100-139
        p->sched_class = &fair_sched_class;
    }

    if (p->sched_class->task_fork)
        p->sched_class->task_fork(p);

    /*
     * The child is not yet in the pid-hash so no cgroup attach races,
     * and the cgroup is pinned to this child due to cgroup_fork()
     * is ran before sched_fork().
     *
     * Silence PROVE_RCU.
     */
    raw_spin_lock_irqsave(&p->pi_lock, flags);
//得到cpuid,将当前cpu设置到新进程thread_info中。
    set_task_cpu(p, cpu);------------------------------------------------------重要一点就是检查p->stack->cpu是不是当期CPU,如果不是则需要进行迁移。迁移函数使用之前确定的sched_class->migrate_task_rq。
    raw_spin_unlock_irqrestore(&p->pi_lock, flags);

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    if (likely(sched_info_on()))
        memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP)
    p->on_cpu = 0;
#endif
    init_task_preempt_count(p);
#ifdef CONFIG_SMP
    plist_node_init(&p->pushable_tasks, MAX_PRIO);
    RB_CLEAR_NODE(&p->pushable_dl_tasks);
#endif

    put_cpu();-----------------------------------------------------------------再次允许内核抢占。
    return 0;
}

copy_mm首先设置MM相关参数,然后使用dup_mm来分配mm_struct数据结构,并从父进程复制到新进程mm_struct。

最后将创建的mm_struct复制给task_struct->mm。

static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
    struct mm_struct *mm, *oldmm;
    int retval;

    tsk->min_flt = tsk->maj_flt = 0;
    tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
    tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
#endif

    tsk->mm = NULL;
    tsk->active_mm = NULL;

    /*
     * Are we cloning a kernel thread?
     *
     * We need to steal a active VM for that..
     */
    oldmm = current->mm;
    if (!oldmm)-----------------------------------------------如果current->mm为NULL,表示是内核线程。
        return 0;

    /* initialize the new vmacache entries */
    vmacache_flush(tsk);

    if (clone_flags & CLONE_VM) {----------------------------CLONE_VM表示父子进程共享内存空间,依次没必要新建内存空间,直接使用oldmm。
        atomic_inc(&oldmm->mm_users);
        mm = oldmm;
        goto good_mm;
    }

    retval = -ENOMEM;
    mm = dup_mm(tsk);---------------------------------------为子进程单独创建一个新的内存空间mm_struct。
    if (!mm)
        goto fail_nomem;

good_mm:
    tsk->mm = mm;-------------------------------------------对新进程内存空间进行赋值。
    tsk->active_mm = mm;
    return 0;

fail_nomem:
    return retval;
}
static struct mm_struct *dup_mm(struct task_struct *tsk)
{
    struct mm_struct *mm, *oldmm = current->mm;
    int err;

    mm = allocate_mm();-----------------------------------分配一个mm_struct数据结构
    if (!mm)
        goto fail_nomem;

    memcpy(mm, oldmm, sizeof(*mm));-----------------------将父进程mm_struct复制到新进程mm_struct。

    if (!mm_init(mm, tsk))--------------------------------主要对子进程的mm_struct成员进行初始化,虽然从父进程复制了相关数据,但是对于子进程需要重新进行初始化。
        goto fail_nomem;

    dup_mm_exe_file(oldmm, mm);

    err = dup_mmap(mm, oldmm);----------------------------将父进程种所有VMA对应的pte页表项内容都复制到子进程对应的PTE页表项中。
    if (err)
        goto free_pt;

    mm->hiwater_rss = get_mm_rss(mm);
    mm->hiwater_vm = mm->total_vm;

    if (mm->binfmt && !try_module_get(mm->binfmt->module))
        goto free_pt;

    return mm;
...
}

 对ARM体系结构,Linux内核栈顶存放着ARM通用寄存器struct pt_regs。

关于pt_regs在内核栈的位置,可以看出首先通过task_stack_page(p)站到内核栈起始地址,即底部。

然后加上地址THREAD_START_SP,即THREAD_SIZE两个页面8KB减去8字节空洞。

所以childregs指向的位置是栈顶部。

copy_thread首先获取栈顶pt_regs位置,然后填充thread_info->cpu_context进程上下文。

copy_thread(unsigned long clone_flags, unsigned long stack_start,
        unsigned long stk_sz, struct task_struct *p)
{
    struct thread_info *thread = task_thread_info(p);--------------------------获取当前进程的thread_info。
    struct pt_regs *childregs = task_pt_regs(p);-------------------------------获取当前进程的pt_regs

    memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));----------cpu_context中保存了进程上下文相关的通用寄存器。

    if (likely(!(p->flags & PF_KTHREAD))) {------------------------------------内核线程处理
        *childregs = *current_pt_regs();
        childregs->ARM_r0 = 0;
        if (stack_start)
            childregs->ARM_sp = stack_start;
    } else {-------------------------------------------------------------------普通线程处理,r4等于stk_sz,r5指向start_start。
        memset(childregs, 0, sizeof(struct pt_regs));
        thread->cpu_context.r4 = stk_sz;
        thread->cpu_context.r5 = stack_start;
        childregs->ARM_cpsr = SVC_MODE;
    }
    thread->cpu_context.pc = (unsigned long)ret_from_fork;---------------------cpu_context中pc指向ret_from_fork
    thread->cpu_context.sp = (unsigned long)childregs;-------------------------cpu_context中sp指向新进程的内核栈

    clear_ptrace_hw_breakpoint(p);

    if (clone_flags & CLONE_SETTLS)
        thread->tp_value[0] = childregs->ARM_r3;
    thread->tp_value[1] = get_tpuser();

    thread_notify(THREAD_NOTIFY_COPY, thread);

    return 0;
}

opy_thread首先获取栈顶pt_regs位置,然后填充thread_info->cpu_context进程上下文。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值