进程退出是任性的,而内核模块有时候会引用task_struct * task。如果是system call同步调用,则可以放心使用,而走户空间来的异步使用task时就要注意安全,否则内核模块理论上有小概率会引起panic,原因就是刚好task对应的进程退出,而task则被释放了。
先说结论:
1.要异步安全使用task,可行的路子是通过pid->task,然后get_task(),用完put_task()
2. 内核要设计上使用了rcu+引用计数的方案实现异步安全使用task.
分析如下:
要点:
1. pid通过rcu引用对应的task,如果进程退出则pid通过rcu得到NULL或者有效的新task。
2.如果是NULL则获取task,可以各回各家,收工。
3. 如果拿到了task,则在rcu的保护下get_task_struct(); 系统保证安全get task这样,task就不会被释放。
4.进程在退出前会先detach_pid(),然后rcu_call()释放task.这样可以安全的rcu了。
5. 最后一个put_task()会释放task。
exit.c
具体相关代码:
/*
* This function expects the tasklist_lock write-locked.
*/
static void __exit_signal(struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
struct tty_struct *uninitialized_var(tty);
u64 utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
lockdep_tasklist_lock_is_held());
spin_lock(&sighand->siglock);
#ifdef CONFIG_POSIX_TIMERS
posix_cpu_timers_exit(tsk);
if (group_dead) {
posix_cpu_timers_exit_group(tsk);
} else {
/*
* This can only happen if the caller is
de_thread().
* FIXME: this is the temporary hack, we
should teach
* posix-cpu-timers to handle this case
correctly.
*/
if (unlikely(has_group_leader_pid(tsk)))
posix_cpu_timers_exit_group(tsk);
}
#endif
if (group_dead) {
tty = sig->tty;
sig->tty = NULL;
} else {
/*
* If there is any task waiting for the
group exit
* then notify it:
*/
if (sig->notify_count > 0 && !--sig-
>notify_count)
wake_up_process(sig-
>group_exit_task);
if (tsk == sig->curr_target)
sig->curr_target = next_thread
(tsk);
}
add_device_randomness((const void*) &tsk-
>se.sum_exec_runtime,
sizeof(unsigned long long));
/*
* Accumulate here the counters for all threads as
they die. We could
* skip the group leader because it is the last
user of signal_struct,
* but we want to avoid the race with
thread_group_cputime() which can
* see the empty ->thread_head list.
*/
task_cputime(tsk, &utime, &stime);
write_seqlock(&sig->stats_lock);
sig->utime += utime;
sig->stime += stime;
sig->gtime += task_gtime(tsk);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
sig->nivcsw += tsk->nivcsw;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig->nr_threads--;
__unhash_process(tsk, group_dead);
write_sequnlock(&sig->stats_lock);
/*
* Do this under ->siglock, we can race with
another thread
* doing sigqueue_free() if we have
SIGQUEUE_PREALLOC signals.
*/
flush_sigqueue(&tsk->pending);
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
__cleanup_sighand(sighand);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
if (group_dead) {
flush_sigqueue(&sig->shared_pending);
tty_kref_put(tty);
}
}
void detach_pid(struct task_struct *task, enum pid_type
type)
{
__change_pid(task, type, NULL);
}
static void __unhash_process(struct task_struct *p, bool
group_dead)
{
nr_threads--;
detach_pid(p, PIDTYPE_PID);
if (group_dead) {
detach_pid(p, PIDTYPE_TGID);
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
__this_cpu_dec(process_counts);
}
list_del_rcu(&p->thread_group);
list_del_rcu(&p->thread_node);
}
void release_task(struct task_struct * p)
{
struct task_struct *leader;
int zap_leader;
repeat:
/* don't need to get the RCU readlock here - the
process is dead and
* can't be modifying its own credentials. But shut
RCU-lockdep up */
rcu_read_lock();
atomic_dec(&__task_cred(p)->user->processes);
rcu_read_unlock();
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
ptrace_release_task(p);
__exit_signal(p);
/*
* If we are the last non-leader member of the
thread
* group, and the leader is zombie, then notify the
* group leader's parent process. (if it wants
notification.)
*/
zap_leader = 0;
leader = p->group_leader;
if (leader != p && thread_group_empty(leader) &&
leader->exit_state == EXIT_ZOMBIE) {
/*
* If we were the last child thread and the
leader has
* exited already, and the leader's parent
ignores SIGCHLD,
* then we are the one who should release
the leader.
*/
zap_leader = do_notify_parent(leader,
leader->exit_signal);
if (zap_leader)
leader->exit_state = EXIT_DEAD;
}
write_unlock_irq(&tasklist_lock);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);
p = leader;
if (unlikely(zap_leader))
goto repeat;
}
void proc_flush_task(struct task_struct *task)
{
int i;
struct pid *pid, *tgid;
struct upid *upid;
pid = task_pid(task);
tgid = task_tgid(task);
for (i = 0; i <= pid->level; i++) {
upid = &pid->numbers[i];
proc_flush_task_mnt(upid->ns->proc_mnt,
upid->nr,
tgid->numbers
[i].nr);
}
}
static void delayed_put_task_struct(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct
task_struct, rcu);
perf_event_delayed_put(tsk);
trace_sched_process_free(tsk);
put_task_struct(tsk);
}
static inline void put_task_struct(struct task_struct *t)
{
if (atomic_dec_and_test(&t->usage))
__put_task_struct(t);
}
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
security_task_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
if (!profile_handoff_task(tsk))
free_task(tsk);
}
void free_task(struct task_struct *tsk)
{
account_kernel_stack(tsk->stack, -1);
arch_release_thread_info(tsk->stack);
free_thread_info(tsk->stack);
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
arch_release_task_struct(tsk);
free_task_struct(tsk);
}
static inline void free_task_struct(struct task_struct
*tsk)
{
kmem_cache_free(task_struct_cachep, tsk);
}
EXPORT_SYMBOL(free_task);
static int proc_exe_link(struct dentry *dentry, struct path
*exe_path)
{
struct task_struct *task;
struct mm_struct *mm;
struct file *exe_file;
task = get_proc_task(dentry->d_inode);
if (!task)
return -ENOENT;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
return -ENOENT;
exe_file = get_mm_exe_file(mm);
mmput(mm);
if (exe_file) {
*exe_path = exe_file->f_path;
path_get(&exe_file->f_path);
fput(exe_file);
return 0;
} else
return -ENOENT;
}
/*
* General functions
*/
static inline struct proc_inode *PROC_I(const struct inode
*inode)
{
return container_of(inode, struct proc_inode,
vfs_inode);
}
static inline struct pid *proc_pid(struct inode *inode)
{
return PROC_I(inode)->pid;
}
static inline struct task_struct *get_proc_task(struct
inode *inode)
{
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
struct task_struct *get_pid_task(struct pid *pid, enum
pid_type type)
{
struct task_struct *result;
rcu_read_lock();
result = pid_task(pid, type);
if (result)
get_task_struct(result);
rcu_read_unlock();
return result;
}
struct task_struct *pid_task(struct pid *pid, enum pid_type
type)
{
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
first = rcu_dereference_check
(hlist_first_rcu(&pid->tasks[type]),
lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct
task_struct, pids[(type)].node);
}
return result;
}
#define get_task_struct(tsk) do { atomic_inc(&(tsk)-
>usage); } while(0)
struct mm_struct *get_task_mm(struct task_struct *task)
{
struct mm_struct *mm;
task_lock(task);
mm = task->mm;
if (mm) {
if (task->flags & PF_KTHREAD)
mm = NULL;
else
atomic_inc(&mm->mm_users);
}
task_unlock(task);
return mm;
}
EXPORT_SYMBOL_GPL(get_task_mm);
struct file *get_mm_exe_file(struct mm_struct *mm)
{
struct file *exe_file;
/* We need mmap_sem to protect against races with
removal of exe_file */
down_read(&mm->mmap_sem);
exe_file = mm->exe_file;
if (exe_file)
get_file(exe_file);
up_read(&mm->mmap_sem);
return exe_file;
}