linux内核进程管理

最新推荐文章于 2023-03-17 10:43:17 发布

李彦龙

最新推荐文章于 2023-03-17 10:43:17 发布

阅读量486

点赞数

分类专栏：英本教育文章标签： linux内核进程调度

本文链接：https://blog.csdn.net/u013388374/article/details/41654387

版权

英本教育专栏收录该内容

1 篇文章 0 订阅

订阅专栏

——————/7
[ ]内核态进程，否则用户态进程
PCB进程控制块
task_struct is allocated via slab allocator to provide object reuse and cache coloring
root@lyl:~# cat /proc/slabinfo
root@lyl:~# cat /proc/sys/kernel/pid_max
32768

struct task_struct {
	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
	pid_t pid;
	pid_t tgid;
	int prio, static_prio, normal_prio;
	struct mm_struct *mm, *active_mm;  //进程所占内存
	struct files_struct *files; 	/* open file information */
	……

——/
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
——/
set_task_state(tsk, state_value)
task->state=state;

——————/8
用户线程pthread_create 内核线程kthread_create / kthread_run

Process Resource Limits
root@lyl:~# ulimit -c -n -s
core file size (blocks, -c) 0 //系统崩溃时自动产生
open files (-n) 1024
stack size (kbytes, -s) 8192
root@lyl:~# ulimit –a
root@lyl:~# cat /proc/self/limits
——/
获取/设置资源限制（getrlimit / setrlimit）

#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/wait.h>
#include <errno.h>

#define DEATH(mess) { perror(mess); exit(errno); }

void do_limit (int limit, const char *limit_string, struct rlimit *rlim)
{
    if (getrlimit (limit, rlim))
        fprintf (stderr, "Failed in getrlimit\n");
    printf ("%15s=%2d: cur=%20lu,     max=%20lu\n", limit_string,
            limit, rlim->rlim_cur, rlim->rlim_max);
}

void print_limits (void)
{
    struct rlimit rlim;
    do_limit (RLIMIT_CPU, "RLIMIT_CPU", &rlim);
    do_limit (RLIMIT_FSIZE, "RLMIT_FSIZE", &rlim);
    do_limit (RLIMIT_DATA, "RLMIT_DATA", &rlim);
    do_limit (RLIMIT_STACK, "RLIMIT_STACK", &rlim);
    do_limit (RLIMIT_CORE, "RLIMIT_CORE", &rlim);
    do_limit (RLIMIT_RSS, "RLIMIT_RSS", &rlim);
    do_limit (RLIMIT_NPROC, "RLIMIT_NPROC", &rlim);
    do_limit (RLIMIT_NOFILE, "RLIMIT_NOFILE", &rlim);
    do_limit (RLIMIT_MEMLOCK, "RLIMIT_MEMLOCK", &rlim);
    do_limit (RLIMIT_AS, "RLIMIT_AS", &rlim);
    do_limit (RLIMIT_LOCKS, "RLIMIT_LOCKS", &rlim);
}

int main (int argc, char *argv[])
{
    struct rlimit rlim;
    pid_t pid = 0;
    int status = 0, nchildren = 3, i;

    printf ("Printing out all limits for pid=%d:\n", getpid ());
    print_limits ();

    /* change and printout the limit for core file size */

    printf ("\nBefore Modification, this is RLIMIT_CORE:\n");
    do_limit (RLIMIT_CORE, "RLIMIT_CORE", &rlim);
    rlim.rlim_cur = 8 * 1024 * 1024;
    printf ("I forked off a child with pid = %d\n", (int)pid);

    setrlimit (RLIMIT_CORE, &rlim);
    printf ("\nAfter  Modification, this is RLIMIT_CORE:\n");
    do_limit (RLIMIT_CORE, "RLIMIT_CORE", &rlim);

    /* fork off the nchildren */
    fflush (stdout);
    for (i = 0; i < nchildren; i++) 
	{
        pid = fork ();
        if (pid < 0)
            DEATH ("Failed in fork");
        if (pid == 0) 
		{         /* any child */
            printf ("\nIn child pid= %d this is RLIMIT_CORE:\n",(int)getpid ());
            do_limit (RLIMIT_CORE, "RLIMIT_CORE", &rlim);
            fflush (stdout);
            sleep (3);
            exit (0);
        }
    }

    while (pid > 0) 
	{           /* parent */
        pid = wait (&status);  //无子进程，调用就会失败，返回-1
        printf ("Parent got return on pid=%dn\n", (int)pid);
    }
    exit (0);
}

——————/9

struct pid
{
	atomic_t count;  //该结构体引用计数
	unsigned int level;
	/* lists of tasks that use this pid */
	struct hlist_head tasks[PIDTYPE_MAX];
	struct rcu_head rcu;  //同步机制
	struct upid numbers[1];
};

level是该pid在pid_namespace中处于第几层，level=0表global namespace，即最高层。
tasks[PIDTYPE_MAX]数组中每个元素都代表了不同含义。

enum pid_type
{
	PIDTYPE_PID,
	PIDTYPE_PGID,
	PIDTYPE_SID,
	PIDTYPE_MAX
};

PIDTYPE_PID表进程描述符(PID)。PIDTYPE_PGID表组进程描述符，好处是如有一信号是针对组进程描述符，该组内所有进程都可接收。PIDTYPE_SID是群组进程描述符，形成一session，为更高层次的抽象。
tasks[i]指向一哈希表，如tasks[PIDTYPE_PID]指向PID哈希表。

numbers[1]为upid结构体，numbers数组本意是想表不同pid_namespace。一PID可属不同namespace，numbers[0]表global namespace，numbers[i]表第i层namespace，i越大所在层级越低。目前该数组只有一元素，即global namespace。所以namepace概念虽引入pid，但并未真正使用，在未来的版本可能会用到。

struct upid {
	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
	int nr;
	struct pid_namespace *ns;
	struct hlist_node pid_chain;
};

nr是pid的值，即task_struct中pid_t pid域的值；
ns指向该pid所处的namespace；
linux内核将所有进程的upid都存放于一哈希表中（pid_hash），以方便查找和统一管理。因此，pid结构体中的numbers[0]指向的upid instance存放在pid_hash中，通过pid_chain就能从pid_hash中找到该upid；

struct pid_namespace {
	struct pidmap pidmap[PIDMAP_ENTRIES];
	int last_pid;
	struct task_struct *child_reaper;
	struct kmem_cache *pid_cachep;
	unsigned int level;
	struct pid_namespace *parent;
	gid_t pid_gid;
	int hide_pid;
	int reboot;	/* group exit code if this pidns was rebooted */
	……;
};

pidmap表该pid_namespace下pid已分配情况
child_reaper指向一进程，作用是当进程结束时为其收尸，因只支持global namespace，故child_reaper指向init_task
level表该namespace处于哪一层，显然是0
parent指向该namespace的父namespace，现在一定是NULL

增加namespace概念的目的是为了虚拟化和方便管理。如在不同的namespace中可有pid相同的进程，pid_namespace的结构是层次化的，且在child namespace中的进程一定会有parent namespace的映射，如图：

此时pid_hash全局哈希表中会存放15个(9+3+3)upid的instance
介绍了pid，upid，pid_namespace的概念，再来看看其和task_struct的关系：

右下角椭圆形虚线框是全局pid_hash，所有已分配的upid都保存在该hash表中
左下角椭圆形虚线框表pid_namespace关系，当然目前只有一层

static inline struct pid *task_pid(struct task_struct *task)
{
	return task->pids[PIDTYPE_PID].pid;
}

——/
PAGE_OFFSET值0xC0000000
——/

do_fork
	copy_process
	task_pid_vnr  //确定pid
	if (clone_flags & CLONE_VFORK)
		init_completion(&vfork);  //initialize vfork completion handler and ptrace flags
	wake_up_new_task
	if (clone_flags & CLONE_VFORK)
		wait_for_completion  //completion完成量，用于并发控制

copy_process
	//check flags
	dup_task_struct(current)
		alloc_task_struct
			kmem_cache_alloc (task_struct_cachep  //slab机制
		arch_dup_task_struct  //体系结构相关
	//check resource limits
	//initialize task_struct
	sched_fork  //Perform scheduler related setup.Assign this task to a CPU.
	
	copy_files
	copy_fs
	copy_signal
	copy_mm
	copy_namespaces
	copy_thread
	copy_io

——/
/* create a slab on which task_structs can be allocated */
task_struct_cachep =kmem_cache_create ("task_struct", sizeof(struct task_struct),…
#cat /proc/slabinfo
task_struct 317 321 ……
——/

struct thread_info  //线程描述结构体，核心仍为task_struct
	struct task_struct	*task;

——————/10

copy_mm
	struct mm_struct * mm
	mm=dup_mm
		struct mm_struct *oldmm = current->mm;
		mm = allocate_mm();
			kmem_cache_alloc(mm_cachep
				mm_cachep = kmem_cache_create("mm_struct"
		memcpy(mm, oldmm, sizeof(*mm));
	tsk->mm = mm

——/

execve
	sys_execve
		do_execve
			/*This structure is used to hold the arguments when loading binaries.*/
			struct linux_binprm *bprm;
			struct file *file;
			file = open_exec(filename);  //打开二进制文件
				do_filp_open
					path_lookup_open
			bprm->file = file;
			bprm->filename = filename;
			prepare_binprm(bprm);
				struct inode * inode = ……
					unsigned long		i_ino;  //保存inode号
				权限检查
			search_binary_handler (bprm,regs);  
			//找到能处理该二进制文件的handler并调用
				struct linux_binfmt *fmt;
				int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
				fn(bprm, regs);  //真正意义执行

——/
进程退出exit与_exit，内核均do_exit

do_exit
	exit_signals
	exit_mm
	exit_sem
	exit_files
	exit_fs
		struct fs_struct *fs = tsk->fs;
		free_fs_struct(fs);
			kmem_cache_free(fs_cachep, fs);
	exit_thread
	preempt_disable
	tsk->state = TASK_DEAD;
	schedule

——————/11

int main(void)
{
    printf("hello world");
    //exit(0);  //flush
    _exit(0);  //not flush
}

——/

#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <sys/types.h>
#include <sys/wait.h>

int param = 0;

int slav (void *data)
{
    int j;
    printf ("starting off slave thread, pid=%d\n", getpid ());
    for (j = 0; j < 10; j++) 
	{
        param = j + 1000;
        sleep (1);
        printf ("\nslave thread running: j=%d, param=%d secs\n", j, param);
    }
    printf ("\nSlave thread saying goodbye\n");
    return 0;
}

int main ()
{
    int j, tid, pagesize, stacksize;
    void *stack;
    printf ("starting off master thread, pid=%d\n", getpid ());
    pagesize = getpagesize ();
    stacksize = 4 * pagesize;

    /* could probably just use malloc(), but this is safer */
    /* stack = (char *)memalign (pagesize, stacksize); */
    posix_memalign (&stack, pagesize, stacksize);
    printf ("Setting off a clone thread with stacksize = %d....", stacksize);
    tid = clone (slav, (char *)stack + stacksize - 1, CLONE_VM | SIGCHLD, 0);
    printf (" with tid=%d\n", tid);
    if (tid < 0)
        exit (1);

    /* could do a  wait (&status) here if required */
    for (j = 0; j < 6; j++) 
	{
        param = j;
        sleep (1);
        printf ("\nmaster thread running: j=%d, param=%d secs\n", j, param);
    }
    printf ("master killitself\n");
    free (stack);
    exit (0);
}

因父子进程共享内存
父进程退出，子进程并未结束，子进程被init进程领养
clone (slav, (char *)stack + stacksize - 1, SIGCHLD, 0); //此时不再共享内存
——/
/* Scheduling policies */
#define SCHED_NORMAL 0
#define SCHED_FIFO 1 //实时
#define SCHED_RR 2 //实时
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5

#yum install crash debuginfo
crash>runq //displays the tasks on the runqueue of each cpu

——/
#fuser /dev/crash //列举哪些进程在使用该设备文件

——————/12
CFS完全公平调度算法

——————/13

schedule
	preempt_disable  //避免其它进程抢占本进程
	cpu = smp_processor_id();  //获取当前cpuId
	rq = cpu_rq(cpu);  //获取该cpu上运行队列
	prev = rq->curr;  //将当前进程作为上一进程
	next = pick_next_task(rq);  /* Pick up the highest-prio task */
		struct task_struct * pfair_sched_class.pick_next_task(rq);
			pick_next_task_fair
				struct cfs_rq *cfs_rq = &rq->cfs;
				struct sched_entity *se = pick_next_entity(cfs_rq);
					__pick_next_entity(cfs_rq);
						rb_entry(left, struct sched_entity, run_node);
	sched_info_switch(prev, next);
	rq->nr_switches++;
	rq->curr = next;
	context_switch(rq, prev, next);  //上下文切换
	preempt_enable_no_resched

——————/14
nice (niceval)
——/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <errno.h>

int main ()
{
    pid_t mypid;
    int old_prio, new_prio, i, rc;
    mypid = getpid ();

    printf ("\nExamining priorities forPID = %d \n", mypid);
    printf ("%10s%10s%10s\n", "Previous", "Requested", "Assigned");

    for (i = -20; i < 20; i += 2) 
	{
        old_prio = getpriority (PRIO_PROCESS, (int)mypid);
        rc = setpriority (PRIO_PROCESS, (int)mypid, i);
        if (rc)
            fprintf (stderr, "setpriority() failed ");
        errno = 0;
        new_prio = getpriority (PRIO_PROCESS, (int)mypid);
        printf ("%10d%10d%10d\n", old_prio, i, new_prio);
    }
    exit (0);
}

——————/15
内核中调度类：CFS、RT

task_struct
	struct sched_class *sched_class;  //调度类
		pick_next_task
		void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);  //入队
		void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);  //出队
		move_one_task 
		void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);

——/
调度相关系统调用：

#include <sched.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

#define DEATH(mess) { perror(mess); exit(errno); }

void printpolicy (int policy)
{
    if (policy == SCHED_OTHER)
        printf ("policy = SCHED_OTHER = %d\n", policy);
    if (policy == SCHED_FIFO)
        printf ("policy = SCHED_FIFO = %d\n", policy);
    if (policy == SCHED_RR)
        printf ("policy = SCHED_RR = %d\n", policy);
}

int main (int argc, char **argv)
{
    int policy;
    struct sched_param p;

    /* obtain current scheduling policy for this process */
    policy = sched_getscheduler (0);  //0获取当前进程
    printpolicy (policy);

    /* reset scheduling policy */
    printf ("\nTrying sched_setscheduler...\n");
    policy = SCHED_FIFO;
    printpolicy (policy);
    p.sched_priority = 50;
    if (sched_setscheduler (0, policy, &p))
        DEATH ("sched_setscheduler:");
    printf ("p.sched_priority = %d\n", p.sched_priority);
    exit (0);
}

多核与多处理器概念异
SMP（Symmetric Multi-Processing）：对称多处理器
负载均衡：将任务平分到各CPU（很热门的技术）

load_balance
	move_tasks
		class->load_balance(......

——————/16

struct rq
	struct task_struct *migration_thread;

——/

// callback that gets triggered when a CPU is added
migration_call
	struct task_struct *p kthread_create(migration_thread, hcpu, "migration/%d",cpu);
	kthread_bind(p, cpu);  //线程与CPU绑定
		migration_thread
			BUG_ON(rq->migration_thread != current);
			__migrate_task (struct task_struct *p, int src_cpu, int dest_cpu)
				rq_src = cpu_rq(src_cpu);
				rq_dest = cpu_rq(dest_cpu);
				deactivate_task (struct task_struct *p, struct rq *rq)
					dec_nr_running(p, rq);
					dequeue_task(p, p->array);
				set_task_cpu(p, dest_cpu);
				__activate_task(p, rq_dest);

——————/17

李彦龙

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
linux内核进程管理

——————/7[ ]内核态进程，否则用户态进程PCB进程控制块task_struct is allocated via slab allocator to provide object reuse and cache coloringroot@lyl:~# cat /proc/slabinforoot@lyl:~# cat /proc/sys/kernel/pid_max
复制链接

扫一扫