用户内存空间之VMA
内核管理用户空间的数据结构是struct vm_area_struct,简称VMA。
一、VMA
用户进程拥有用户空间的地址,其可以通过malloc和mmap等函数来申请内存。malloc和mmap等函数的实现都基于进程线性区描述struct vm_area_struct。
内核管理进程地址空间使用的数据结构是struct vm_area_struct,简称VMA。
对于每个进程的内存描述符mm_struct mm,都有各自的VMA,通过mm->mmap链表将进程所有的VMA管理起来,同时会记录到mm->mm_rb红黑树,用于高效查找合并VMA等操作。
struct mm_struct {
struct vm_area_struct *mmap; /* list of VMAs */
struct rb_root mm_rb;
int map_count; /* number of VMAs */
……
}
- mmap形成vm_area_struct 有序单链表,记录进程所有的VMA,mm->mmap即为该链表表头。该有序单链表按照VMA->vm_start递增方式组织。
- mm_rb是VMA红黑树根节点,将进程所有的VMA记录到红黑树中,以提高查找效率。
- map_count记录进程mm所拥有的VMA数量。
VMA的数据结构vm_area_struct如下。
/*
* This struct defines a memory VMM memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
* space that has a special rule for the page-fault handlers (ie a shared
* library, the executable area etc).
*/
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
/* VMA描述的线性地址空间的起始地址和结束地址(虚拟地址) */
unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
/* linked list of VM areas per task, sorted by address */
/* 进程VMA链表,分别指向前一个和后一个VMA。链表按虚拟地址大小排序 */
struct vm_area_struct *vm_next, *vm_prev;
/* VMA作为一个节点加入mm->mm_rb红黑树 */
struct rb_node vm_rb;
/*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap;
/* Second cache line starts here. */
/* 指向VMA所属的mm_struct mm */
struct mm_struct *vm_mm; /* The address space we belong to. */
/*
* Access permissions of this VMA.
* See vmf_insert_mixed_prot() for discussion.
*/
/* VMA访问权限 */
pgprot_t vm_page_prot;
/* VMA标志位 */
unsigned long vm_flags; /* Flags, see mm.h. */
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*/
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
/* 用于管理RMAP反向映射 */
struct list_head anon_vma_chain; /* Serialized by mmap_sem &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */
/* VMA方法,open/close VMA等方法 */
const struct vm_operations_struct *vm_ops;
/* Information about our backing store: */
/* 文件映射的偏移量,单位为PAGE_SIZE.对于匿名页面,指vm_start/PAGE_SIZE */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units */
/* 指向映射文件的文件对象 */
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
二、VMA Flag
/*
* vm_flags in vm_area_struct, see mm_types.h.
* When changing, update also include/trace/events/mmflags.h
*/
#define VM_NONE 0x00000000
/* 页可读,可写,可执行,可以多个进程共享 */
#define VM_READ 0x00000001 /* currently active flags */
#define VM_WRITE 0x00000002
#define VM_EXEC 0x00000004
#define VM_SHARED 0x00000008
/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */
#define VM_MAYWRITE 0x00000020
#define VM_MAYEXEC 0x00000040
#define VM_MAYSHARE 0x00000080
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
#define VM_LOCKED 0x00002000
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
/* Used by sys_madvise() */
#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_SYNC 0x00800000 /* Synchronous page faults */
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
#ifdef CONFIG_MEM_SOFT_DIRTY
# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */
#else
# define VM_SOFTDIRTY 0
#endif
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */