深入分析线程与进程的区别

本文详细探讨了进程和线程在内存开销上的区别,指出线程共享进程的地址空间,如mm_struct,以及在fork和pthread_create中如何通过clone_flags决定资源拷贝。线程开销小的原因在于内存使用率降低,因为它们共享进程的VM、文件、文件系统信息和信号处理。而在Linux内核中,无论是进程还是线程,都被视为线程进行调度。
摘要由CSDN通过智能技术生成

很多面试者都愿意问一下 进程和线程的区别,搜一下百度的大家的理解
在这里插入图片描述
总体来说比较抽象,主要有两个含义没有深入讲清楚
1.什么叫共享地址空间?这个地址空间只的是什么
2.什么叫进程开销大 线程开销小为什么?

共享地址空间,哪些地址空间

每个进程和线程都会通过task_struct 这么一个结构体去维护
这个结构体主要就是保存着进程或线程在内存调度所使用的资源

struct task_struct {
	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
	void *stack;
	atomic_t usage;
	unsigned int flags;	/* per process flags, defined below */
	unsigned int ptrace;
struct mm_struct *mm, *active_mm;
省略

然而内存的资源是通过struct mm_struct *mm, *active_mm;来维护的,它的作用是记录程序运行的堆栈增信息

struct mm_struct {
	struct vm_area_struct *mmap;		/* list of VMAs */
	struct rb_root mm_rb;
	u32 vmacache_seqnum;                   /* per-thread vmacache */
#ifdef CONFIG_MMU
	unsigned long (*get_unmapped_area) (struct file *filp,
				unsigned long addr, unsigned long len,
				unsigned long pgoff, unsigned long flags);
#endif
	unsigned long mmap_base;		/* base of mmap area */
	unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
	unsigned long task_size;		/* size of task vm space */
	unsigned long highest_vm_end;		/* highest vma end address */
	pgd_t * pgd;
	atomic_t mm_users;			/* How many users with user space? */
	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
	atomic_long_t nr_ptes;			/* PTE page table pages */
#if CONFIG_PGTABLE_LEVELS > 2
	atomic_long_t nr_pmds;			/* PMD page table pages */
#endif
	int map_count;				/* number of VMAs */

	spinlock_t page_table_lock;		/* Protects page tables and some counters */
	struct rw_semaphore mmap_sem;

	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
						 * together off init_mm.mmlist, and are protected
						 * by mmlist_lock
						 */


	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
	unsigned long hiwater_vm;	/* High-water virtual memory usage */

	unsigned long total_vm;		/* Total pages mapped */
	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
	unsigned long pinned_vm;	/* Refcount permanently increased */
	unsigned long shared_vm;	/* Shared pages (files) */
	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
	unsigned long def_flags;
	unsigned long start_code, end_code, start_data, end_data;
	unsigned long start_brk, brk, start_stack;
	unsigned long arg_start, arg_end, env_start, env_end;

	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

	/*
	 * Special counters, in some configurations protected by the
	 * page_table_lock, in other configurations by being atomic.
	 */
	struct mm_rss_stat rss_stat;

	struct linux_binfmt *binfmt;

	cpumask_var_t cpu_vm_mask_var;

	/* Architecture-specific MM context */
	mm_context_t context;

	unsigned long flags; /* Must use atomic bitops to access the bits */

	struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_AIO
	spinlock_t			ioctx_lock;
	struct kioctx_table __rcu	*ioctx_table;
#endif
#ifdef CONFIG_MEMCG
	/*
	 * "owner" points to a task that is regarded as the canonical
	 * user/owner of this mm. All of the following must be true in
	 * order for it to be changed:
	 *
	 * current == mm->owner
	 * current->mm != mm
	 * new_owner->mm == mm
	 * new_owner->alloc_lock is held
	 */
	struct task_struct __rcu *owner;
#endif

	/* store ref to file /proc/<pid>/exe symlink points to */
	struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
	struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
	struct cpumask cpumask_allocation;
#endif
#ifdef CONFIG_NUMA_BALANCING
	/*
	 * numa_next_scan is the next time that the PTEs will be marked
	 * pte_numa. NUMA hinting faults will gather statistics and migrate
	 * pages to new nodes if necessary.
	 */
	unsigned long numa_next_scan;

	/* Restart point for scanning and setting pte_numa */
	unsigned long numa_scan_offset;

	/* numa_scan_seq prevents two threads setting pte_numa */
	int numa_scan_seq;
#endif
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
	/*
	 * An operation with batched TLB flushing is going on. Anything that
	 * can move process memory needs to flush the TLB when moving a
	 * PROT_NONE or PROT_NUMA mapped page.
	 */
	bool tlb_flush_pending;
#endif
	struct uprobes_state uprobes_state;
#ifdef CONFIG_X86_INTEL_MPX
	/* address of the bounds directory */
	void __user *bd_addr;
#endif
};

于是 我们就有了这么一张图

在这里插入图片描述
mm_struct每个进程都会维护一份属于自己一个结构体 然而每个线程都会共享进程的这个mm_struct
于是我们有了这张图
在这里插入图片描述
那么除了这个mm_struct结构体是否还有其他数据是共享的呢,当然有了,跟踪一下fork 和pthread_create的调度流程
fork 和pthread_create都会调用内核的_do_fork,关键函数是copy_process

long do_fork(unsigned long clone_flags,
        unsigned long stack_start,
        unsigned long stack_size,
        int __user *parent_tidptr,
        int __user *child_tidptr)
{
  return _do_fork(clone_flags, stack_start, stack_size,
      parent_tidptr, child_tidptr, 0);
}

long _do_fork(unsigned long clone_flags,
        unsigned long stack_start,
        unsigned long stack_size,
        int __user *parent_tidptr,
        int __user *child_tidptr,
        unsigned long tls)
{
	// .... 省略
	p = copy_process(clone_flags, stack_start, stack_size,
       child_tidptr, NULL, trace, tls);
	// .... 省略
}

我们看一下copy_process干了什么事情,这个内核解释以及很明显了,主要是根据flag去拷贝old one!!!,主要是程序运行的寄存器信息和合适的进程变量
/*

  • This creates a new process as a copy of the old one,
  • but does not actually start it yet.
  • It copies the registers, and all the appropriate
  • parts of the process environment (as per the clone
  • flags). The actual kick-off is left to the caller.
    */
    static struct task_struct *copy_process(unsigned long clone_flags,
    unsigned long stack_start,
    unsigned long stack_size,
    int __user *child_tidptr,
    struct pid pid,
    int trace)
    {
    /
    copy all the process information */
    shm_init_task§;
    retval = copy_semundo(clone_flags, p);
    if (retval)
    goto bad_fork_cleanup_audit;
    retval = copy_files(clone_flags, p);//拷贝文件系统信息
    if (retval)
    goto bad_fork_cleanup_semundo;
    retval = copy_fs(clone_flags, p);//拷贝打开的文件
    if (retval)
    goto bad_fork_cleanup_files;
    retval = copy_sighand(clone_flags, p);
    if (retval)
    goto bad_fork_cleanup_fs;
    retval = copy_signal(clone_flags, p);//拷贝信号
    if (retval)
    goto bad_fork_cleanup_sighand;
    retval = copy_mm(clone_flags, p);//拷贝mm虚拟内存
    }
    那么在创建线程的时候它这个flag又传入了那些内容?
    CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
    线程跟父进程共享了地址空间、打开的文件、文件系统信息和信号处理函数
    总结一下
    在这里插入图片描述

进程开销大 线程开销小为什么?

1.线程是共享进程部分内存,所以内存的使用率降低了
2. Linux下内核其实本质上没有线程的概念, Linux下线程其实上是与其他进程共享某些资源的进程而已
无论是进程还是线程 在内核这边都统一叫做线程,内核维护着一个runqueue指向就绪态的线程表,待条件满足进行调度。
所以如果要说线程开销小 进程开销大 实际是只的就是内存的开销,CPU每一次fork都要申请新的页表来维护VM、file、fs、signal操作,除了消耗了内存 也消耗了CPU的运行指令。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值