内存区域由vm_area_struct结构体描述,内存区域在内核中也被称作虚拟内存区域或VMA。vm_area_struct结构体描述了指定地址空间内连续区间上的一个独立内存范围。内核将每个内存区域作为一个单独的内存对象管理,每个内存区域都有一致的属性和操作:
- 在Mm.h中
- /*
- * Linux kernel virtual memory manager primitives.
- * The idea being to have a "virtual" mm in the same way
- * we have a virtual fs - giving a cleaner interface to the
- * mm details, and allowing different kinds of memory mappings
- * (from shared memory to executable loading to arbitrary
- * mmap() functions).
- */
- /*
- * This struct defines a memory VMM memory area. There is one of these
- * per VM-area/task. A VM area is any part of the process virtual memory
- * space that has a special rule for the page-fault handlers (ie a shared
- * library, the executable area etc).
- */
- struct vm_area_struct {
- struct mm_struct * vm_mm; /* The address space we belong to. */
- unsigned long vm_start; /* Our start address within vm_mm. */
- unsigned long vm_end; /* The first byte after our end address
- within vm_mm. */
- /* linked list of VM areas per task, sorted by address */
- struct vm_area_struct *vm_next;
- pgprot_t vm_page_prot; /* Access permissions of this VMA. */
- unsigned long vm_flags; /* Flags, listed below. */
- struct rb_node vm_rb;
- /*
- * For areas with an address space and backing store,
- * linkage into the address_space->i_mmap prio tree, or
- * linkage to the list of like vmas hanging off its node, or
- * linkage of vma in the address_space->i_mmap_nonlinear list.
- */
- union {
- struct {
- struct list_head list;
- void *parent; /* aligns with prio_tree_node parent */
- struct vm_area_struct *head;
- } vm_set;
- struct raw_prio_tree_node prio_tree_node;
- } shared;
- /*
- * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
- * list, after a COW of one of the file pages. A MAP_SHARED vma
- * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
- * or brk vma (with NULL file) can only be in an anon_vma list.
- */
- struct list_head anon_vma_node; /* Serialized by anon_vma->lock */
- struct anon_vma *anon_vma; /* Serialized by page_table_lock */
- /* Function pointers to deal with this struct. */
- struct vm_operations_struct * vm_ops;
- /* Information about our backing store: */
- unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
- units, *not* PAGE_CACHE_SIZE */
- struct file * vm_file; /* File we map to (can be NULL). */
- void * vm_private_data; /* was vm_pte (shared mem) */
- unsigned long vm_truncate_count;/* truncate_count or restart_addr */
- #ifndef CONFIG_MMU
- atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */
- #endif
- #ifdef CONFIG_NUMA
- struct mempolicy *vm_policy; /* NUMA policy for the VMA */
- #endif
- };
每个内存描述符都对应于进程地址空间中的惟一区间。内存区域的位置就在[vm_start,vm_end)之中。注意,在同一个地址空间内的不同内存区间不能重叠。
mv_mm指向与VMA相关的mm_struct结构体。每个VMA对其相关的mm_struct结构体来说都是惟一的,所以即使两个独立的进程将同一个文件映射到各自的地址空间,它们都分别有一个vm_area_struct结构体来标志自己的内存区域;但是如果两个进程共享一个地址空间,那么它们也同时共享其中的所有vm_area_struct结构体。
VMA标志
VMA标志是一种位标志,标志了内存区域所包含的页面的行为和信息。和物理页的访问权限不同,VMA标志反映了内核处理页面所需要遵循的行为准则,而不是硬件要求。该标志同时也包含了内存区域中页面的信息,或内存区域的整体信息:
- 在mm.h中
- /*
- * vm_flags..
- */
- #define VM_READ 0x00000001 /* currently active flags */
- #define VM_WRITE 0x00000002
- #define VM_EXEC 0x00000004
- #define VM_SHARED 0x00000008
- /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
- #define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */
- #define VM_MAYWRITE 0x00000020
- #define VM_MAYEXEC 0x00000040
- #define VM_MAYSHARE 0x00000080
- #define VM_GROWSDOWN 0x00000100 /* general info on the segment */
- #define VM_GROWSUP 0x00000200
- #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
- #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
- #define VM_EXECUTABLE 0x00001000
- #define VM_LOCKED 0x00002000
- #define VM_IO 0x00004000 /* Memory mapped I/O or similar */
- /* Used by sys_madvise() */
- #define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
- #define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
- #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
- #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
- #define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
- #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
- #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
- #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
- #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
- #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
- #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
- VMA操作
vm_area_struct结构体中的vm_ops域指向与指定内存域相关的操作函数表,内核使用表中的方法操作VMA。vm_area_struct为通用对象代表了任何类型的内存区域,而操作表描述针对特定的对象实例的特定方法。
- 在mm.h中
- /*
- * These are the virtual MM functions - opening of an area, closing and
- * unmapping it (needed to keep files on disk up-to-date etc), pointer
- * to the functions called when a no-page or a wp-page exception occurs.
- */
- struct vm_operations_struct {
- /*当指定的内存区域被加入到一个地址空间时该函数被调用*/
- void (*open)(struct vm_area_struct * area);
- /*当指定的内存区域从地址空间删除时,该函数被调用*/
- void (*close)(struct vm_area_struct * area);
- /*当要访问的页不在物理内存中时,该函数被页错误处理程序调用*/
- struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
- unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
- /*该函数被系统调用remap_pages()调用来为将要发生的缺页中断预映射一个新映射*/
- int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- /* notification that a previously read-only page is about to become
- * writable, if an error is returned it will cause a SIGBUS */
- int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
- #ifdef CONFIG_NUMA
- int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
- struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
- unsigned long addr);
- int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
- const nodemask_t *to, unsigned long flags);
- #endif
- };
- 内存区域的树形结构和内存区域的链表结构
可以通过内存描述中的mmap和mm_rb域之一访问内存区域,这两个域各自独立地指向域内存描述符相关的全体内存区域对象。其实,它们包含完全相同的vm_area_struct结构体指针,仅仅是组织方法不同。
mmap使用单独的链表连接所有的内存区域对象。每一个vm_area_struct结构体通过自身的vm_next联入链表,所有的区域按地址增长的方向排序,mmap域指向链表中第一个内存区域,链表中最后一个VMA结构体指针指向空。
mm_rb使用红黑树连接所欲哦的内存区域对象。mm_rb指向红黑树的根节点,地址空间中的每一个vm_area_struct结构体通过自身的vm_rb连接到树中。
红黑树中的所有节点都遵从:左边节点值小于右边节点值;每个节点都被配以红色或黑色。分配规则为:红色节点的子结点为黑色,并且数中的任何一条从节点到叶子的路径必须包含同样数目的黑色节点。根节点总为红色。红黑树的搜索、插入、删除等操作的复杂度都为O(log(n))。
链表用于需要遍历全部节点的时候,红黑树适用于在地址空间中定位特定内存区域的时候。内核为了内存区域是上的各种不同操作都能获得高性能,所以使用了这两种数据结构。
- 实际使用中的内存区域
使用/proc文件系统和pmap(1)工具可以查看给定进程的内存空间和其中所含的内存区域。
相关内容请参看
Linux 的 Virtual Memory Areas(VMA):基本概念介紹http://lixiang.cn/?q=node/105
最后给出mm_struct和VMA的关系图:
a