
1. 概述


  • vma;
  • malloc;
  • mmap;

进程地址空间中,我们常见的代码段,数据段,bss段等,实际上都是一段地址空间区域。Linux将地址空间中的区域称为Virtual Memory Area, 简称VMA,使用struct vm_area_struct来描述。


2. 数据结构

主要涉及两个结构体:struct mm_structstruct vm_area_struct

  • struct mm_struct
struct mm_struct {
	struct vm_area_struct *mmap;		/* list of VMAs */                              //指向VMA对象的链表头
	struct rb_root mm_rb;                                                                     //指向VMA对象的红黑树的根
	u64 vmacache_seqnum;                   /* per-thread vmacache */
	unsigned long (*get_unmapped_area) (struct file *filp,
				unsigned long addr, unsigned long len,
				unsigned long pgoff, unsigned long flags);              // 在进程地址空间中搜索有效线性地址区间的方法
	unsigned long mmap_base;		/* base of mmap area */
	unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
	/* Base adresses for compatible mmap() */
	unsigned long mmap_compat_base;
	unsigned long mmap_compat_legacy_base;
	unsigned long task_size;		/* size of task vm space */
	unsigned long highest_vm_end;		/* highest vma end address */
	pgd_t * pgd;        //指向页全局目录

	 * @mm_users: The number of users including userspace.
	 * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
	 * to 0 (i.e. when the task exits and there are no other temporary
	 * reference holders), we also release a reference on @mm_count
	 * (which may then free the &struct mm_struct if @mm_count also
	 * drops to 0).
	atomic_t mm_users;      //使用计数器

	 * @mm_count: The number of references to &struct mm_struct
	 * (@mm_users count as 1).
	 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
	 * &struct mm_struct is freed.
	atomic_t mm_count;      //使用计数器

	atomic_long_t nr_ptes;			/* PTE page table pages */      //进程页表数
	atomic_long_t nr_pmds;			/* PMD page table pages */
	int map_count;				/* number of VMAs */        //VMA的个数

	spinlock_t page_table_lock;		/* Protects page tables and some counters */
	struct rw_semaphore mmap_sem;

	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
						 * together off init_mm.mmlist, and are protected
						 * by mmlist_lock

	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
	unsigned long hiwater_vm;	/* High-water virtual memory usage */

	unsigned long total_vm;		/* Total pages mapped */    //进程地址空间的页数
	unsigned long locked_vm;	/* Pages that have PG_mlocked set */    //锁住的页数,不能换出
	unsigned long pinned_vm;	/* Refcount permanently increased */
	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */     //数据段内存的页数
	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */         //可执行内存映射的页数
	unsigned long stack_vm;		/* VM_STACK */                                              //用户态堆栈的页数
	unsigned long def_flags;
	unsigned long start_code, end_code, start_data, end_data;       //代码段,数据段等的地址
	unsigned long start_brk, brk, start_stack;      //堆栈段的地址,start_stack表示用户态堆栈的起始地址,brk为堆的当前最后地址
	unsigned long arg_start, arg_end, env_start, env_end;  //命令行参数的地址,环境变量的地址

	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

	 * Special counters, in some configurations protected by the
	 * page_table_lock, in other configurations by being atomic.
	struct mm_rss_stat rss_stat;

	struct linux_binfmt *binfmt;

	cpumask_var_t cpu_vm_mask_var;

	/* Architecture-specific MM context */
	mm_context_t context;

	unsigned long flags; /* Must use atomic bitops to access the bits */

	struct core_state *core_state; /* coredumping support */
	atomic_t membarrier_state;
	spinlock_t			ioctx_lock;
	struct kioctx_table __rcu	*ioctx_table;
	 * "owner" points to a task that is regarded as the canonical
	 * user/owner of this mm. All of the following must be true in
	 * order for it to be changed:
	 * current == mm->owner
	 * current->mm != mm
	 * new_owner->mm == mm
	 * new_owner->alloc_lock is held
	struct task_struct __rcu *owner;
	struct user_namespace *user_ns;

	/* store ref to file /proc/<pid>/exe symlink points to */
	struct file __rcu *exe_file;
	struct mmu_notifier_mm *mmu_notifier_mm;
	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
	struct cpumask cpumask_allocation;
	 * numa_next_scan is the next time that the PTEs will be marked
	 * pte_numa. NUMA hinting faults will gather statistics and migrate
	 * pages to new nodes if necessary.
	unsigned long numa_next_scan;

	/* Restart point for scanning and setting pte_numa */
	unsigned long numa_scan_offset;

	/* numa_scan_seq prevents two threads setting pte_numa */
	int numa_scan_seq;
	 * An operation with batched TLB flushing is going on. Anything that
	 * can move process memory needs to flush the TLB when moving a
	 * PROT_NONE or PROT_NUMA mapped page.
	atomic_t tlb_flush_pending;
	/* See flush_tlb_batched_pending() */
	bool tlb_flush_batched;
	struct uprobes_state uprobes_state;
	atomic_long_t hugetlb_usage;
	struct work_struct async_put_work;

	/* HMM needs to track a few things per mm */
	struct hmm *hmm;
} __randomize_layout;
  • struct vm_area_struct
    用于描述进程地址空间中的一段虚拟区域,每一个VMA都对应一个struct vm_area_struct
 * This struct defines a memory VMM memory area. There is one of these
 * per VM-area/task.  A VM area is any part of the process virtual memory
 * space that has a special rule for the page-fault handlers (ie a shared
 * library, the executable area etc).
struct vm_area_struct {
	/* The first cache line has the info for VMA tree walking. */

	unsigned long vm_start;		/* Our start address within vm_mm. */       //起始地址
	unsigned long vm_end;		/* The first byte after our end address
					   within vm_mm. */         //结束地址,区间中不包含结束地址

	/* linked list of VM areas per task, sorted by address */       //按起始地址排序的链表
	struct vm_area_struct *vm_next, *vm_prev;

	struct rb_node vm_rb;       //红黑树节点

	 * Largest free memory gap in bytes to the left of this VMA.
	 * Either between this VMA and vma->vm_prev, or between one of the
	 * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
	 * get_unmapped_area find a free area of the right size.
	unsigned long rb_subtree_gap;

	/* Second cache line starts here. */

	struct mm_struct *vm_mm;	/* The address space we belong to. */
	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
	unsigned long vm_flags;		/* Flags, see mm.h. */

	 * For areas with an address space and backing store,
	 * linkage into the address_space->i_mmap interval tree.
	struct {
		struct rb_node rb;
		unsigned long rb_subtree_last;
	} shared;

	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
	 * list, after a COW of one of the file pages.	A MAP_SHARED vma
	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
	 * or brk vma (with NULL file) can only be in an anon_vma list.
	struct list_head anon_vma_chain; /* Serialized by mmap_sem &
					  * page_table_lock */
	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */

	/* Function pointers to deal with this struct. */
	const struct vm_operations_struct *vm_ops;

	/* Information about our backing store: */
	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
					   units */
	struct file * vm_file;		/* File we map to (can be NULL). */     //指向文件的一个打开实例
	void * vm_private_data;		/* was vm_pte (shared mem) */

	atomic_long_t swap_readahead_info;
#ifndef CONFIG_MMU
	struct vm_region *vm_region;	/* NOMMU mapping region */
	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;





/*  VMA的查找 */
/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); //查找第一个满足addr < vm_end的VMA块
extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
					     struct vm_area_struct **pprev); //与find_vma功能类似,不同之处在于还会返回VMA链接的前一个VMA;
 static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr); //查找与start_addr~end_addr区域有交集的VMA
 /* VMA的插入 */
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); //插入VMA到红黑树中和链表中
 /* VMA的合并 */
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
	struct mempolicy *, struct vm_userfaultfd_ctx); //将VMA与附近的VMA进行融合操作
 /* VMA的拆分 */
 extern int split_vma(struct mm_struct *, struct vm_area_struct *,
	unsigned long addr, int new_below); //将VMA以addr为界线分成两个VMA


3. malloc






整个过程看起来就比较清晰和简单了,每个进程都用struct mm_struct来描述自身的进程地址空间,这些空间都是一些vma区域,通过一个红黑树和链表来管理。因此针对malloc的处理,会去动态的调整brk的位置,具体的大小则由struct vm_area_struct结构中的vm_start ~ vm_end来指定。在实际过程中,会根据请求分配区域是否与现有vma重叠的情况来进行处理,或者重新申请一个vma来描述这段区域,并最终插入到红黑树和链表中。


4. mmap


  • 文件映射: 将文件区域映射到进程空间,文件存放在存储设备上;
  • 匿名映射:没有文件对应的区域映射,内容存放在物理内存上;


  • 私有映射:将数据源拷贝副本,不影响其他进程;
  • 共享映射:共享的进程都能看到;


  1. 私有匿名映射: 通常分配大块内存时使用,堆,栈,bss段等;
  2. 共享匿名映射:常用于父子进程间通信,在内存文件系统中创建/dev/zero设备;
  3. 私有文件映射:常用的比如动态库加载,代码段,数据段等;
  4. 共享文件映射:常用于进程间通信,文件读写等;


#define PROT_READ	0x1		/* page can be read */
#define PROT_WRITE	0x2		/* page can be written */
#define PROT_EXEC	0x4		/* page can be executed */
#define PROT_SEM	0x8		/* page may be used for atomic ops */
#define PROT_NONE	0x0		/* page can not be accessed */
#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */

#define MAP_SHARED	0x01		/* Share changes */
#define MAP_PRIVATE	0x02		/* Changes are private */
#define MAP_TYPE	0x0f		/* Mask for type of mapping */
#define MAP_FIXED	0x10		/* Interpret addr exactly */
#define MAP_ANONYMOUS	0x20		/* don't use a file */

#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
#define MAP_LOCKED	0x2000		/* pages are locked */
#define MAP_NORESERVE	0x4000		/* don't check for reservations */
#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
#define MAP_NONBLOCK	0x10000		/* do not block on IO */
#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB	0x40000		/* create a huge page mapping */


  • 0
  • 4
    觉得还不错? 一键收藏
  • 0
目录 第一章 Linux底层分段分页机制 5 1.1 基于x86的Linux分段机制 5 1.2 基于x86的Linux分页机制 7 1.2.1 页全局目录和页表 8 1.2.2 线性地址到物理地址 10 1.2.3 线性地址字段处理 13 1.2.4 页表处理 15 1.3 扩展分页与联想存储器 20 1.4 Linux内存布局 21 1.5 内核空间和用户空间 23 1.5.1 初始化临时内核页表 24 1.5.2 永久内核页表的初始化 32 1.5.3 第一次进入用户空间 41 1.5.4 内核映射机制实例 44 1.6 固定映射的线性地址 48 1.7 高端内存内核映射 50 1.8.1 永久内存映射 50 1.8.2 临时内核映射 55 第二章 内核级内存管理系统 58 2.1 Linux页面管理 58 2.1.1 NUMA架构 61 2.1.2 内存管理区 62 2.2 伙伴系统算法 65 2.2.1 数据结构 66 2.2.2 块分配 67 2.2.3 块释放 69 2.3 Linux页面级内存管理 72 2.3.1 分配一组页面 73 2.3.2 释放一组页面 80 2.4 每CPU页面高速缓存 81 2.4.1 数据结构 81 2.4.2 通过每CPU 页高速缓存分配页面 82 2.4.3 释放页面到每CPU 页面高速缓存 83 2.5 slab分配器 85 2.5.1 数据结构 86 2.5.2 分配/释放slab页面 92 2.5.3 增加slab数据结构 93 2.5.4 高速缓存内存布局 94 2.5.5 slab着色 95 2.5.6 分配slab对象 96 2.5.7 释放Slab对象 100 2.5.8 通用对象 102 2.5.9 内存池 103 2.6 非连续内存区 104 2.6.1 高端内存区回顾 105 2.6.2 非连续内存区的描述符 106 2.6.3 分配非连续内存区 109 2.6.4 释放非连续内存区 113 第三章 进程的地址空间 117 3.1 用户态内存分配 117 3.1.1 mm_struct数据结构 118 3.1.2 内核线程的内存描述符 122 3.2 线性区的数据结构 123 3.2.1 线性区数据结构 123 3.2.2 红-黑树算法 126 3.2.3 线性区访问权限 128 3.3 线性区的底层处理 130 3.3.1 查找给定地址的最邻近区 131 3.3.2 查找一个与给定的地址区间相重叠的线性区 135 3.3.3 查找一个空闲的地址区间 135 3.3.4 向内存描述符链表插入一个线性区 137 3.4 分配线性地址区间 141 3.5 释放线性地址区间 151 3.5.1 do_munmap()函数 151 3.5.2 split_vma()函数 153 3.5.3 unmap_region()函数 155 3.6 创建和删除进程的地址空间 156 3.6.1 创建进程的地址空间 156 3.6.2 删除进程的地址空间 175 3.6.3 内核线程1号的地址空间 176 3.7 堆的管理 178 第四章 磁盘文件内存映射 182 4.1 内存映射的数据结构 182 4.2 内存映射的创建 184 4.3 内存映射的请求调页 194 4.4 刷新内存映射的脏页 203 4.5 非线性内存映射 210 第五章 页面的回收 215 5.1 页框回收概念 215 5.1.1 选择目标页 216 5.1.2 PFRA设计 217 5.2 反向映射技术 218 5.2.1 匿名页的反向映射 220 5.2.2 优先搜索树 226 5.2.3 映射页的反向映射 231 5.3 PFRA实现 235 5.3.1 最近最少使用(LRU)链表 236 5.3.2 内存紧缺回收 242 5.3.3 回收磁盘高速缓存的页 267 5.3.4 周期回收 273 5.3.5 内存不足删除程序 283 第六章 交换机制 289 6.1 交换区数据结构 289 6.1.1 创建交换区 290 6.1.2 交换区描述符 291 6.1.3 换出页标识符 293 6.2 激活和禁用交换区 295 6.2.1 sys_swapon()系统调用 296 6.2.2 sys_swapoff()系统调用 304 6.2.3 try_to_unuse()函数 308 6.3 分配和释放页槽 313 6.3.1 scan_swap_map()函数 313 6.3.2 get_swap_page()函数 316 6.3.3 swap_free()函数 318 6.4 页面的换入换出 320 6.4.1 交换高速缓存 320 6.4.2 换出页 323 6.4.3 换入页 329 第七章 缺页异常处理程序 335 7.1 总体流程 335 7.2 vma以外的错误地址 341 7.3 vma内的错误地址 346 7.3.1 handle_mm_fault()函数 348 7.3.2 请求调页 352 7.3.3 写时复制 358 7.4 处理非连续内存区访问 364


  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


