linux内存管理页帧管理

linux页帧管理采用伙伴系统算法,与bootmem相比有更快的分配速度,能够快速打找到空闲内存;有效的解决外部碎片。
页帧的处理代码主要在:mm/page_alloc.c中

I.主要数据结构
i.页描述符struct page
内存管理的核心是页帧管理,内核必须清楚的知道页帧的状态:该页帧是否空闲,是否已经包含代码或数据,是否已经被修改等。
每个页帧都有一个页描述符与之对应,用来表示页帧信息;页描述符用page结构表示

 33 /*
 34  * Each physical page in the system has a struct page associated with
 35  * it to keep track of whatever it is we are using the page for at the
 36  * moment. Note that we have no way to track which tasks are using
 37  * a page, though if it is a pagecache page, rmap structures can tell us
 38  * who is mapping it.
 39  */
 40 struct page {
 41         unsigned long flags;            /* Atomic flags, some possibly
 42                                          * updated asynchronously */
 43         atomic_t _count;                /* Usage count, see below. */
 44         union {
 45                 atomic_t _mapcount;     /* Count of ptes mapped in mms,
 46                                          * to show when page is mapped
 47                                          * & limit reverse map searches.
 48                                          */
 49                 struct {                /* SLUB */
 50                         u16 inuse;
 51                         u16 objects;
 52                 };
 53         };
 54         union {
 55             struct {
 56                 unsigned long private;          /* Mapping-private opaque data:
 57                                                  * usually used for buffer_heads
 58                                                  * if PagePrivate set; used for
 59                                                  * swp_entry_t if PageSwapCache;
 60                                                  * indicates order in the buddy
 61                                                  * system if PG_buddy is set.
 62                                                  */
 63                 struct address_space *mapping;  /* If low bit clear, points to
 64                                                  * inode address_space, or NULL.
 65                                                  * If page mapped as anonymous
 66                                                  * memory, low bit is set, and
 67                                                  * it points to anon_vma object:
 68                                                  * see PAGE_MAPPING_ANON below.
 69                                                  */
 70             };
 71 #if USE_SPLIT_PTLOCKS
 72             spinlock_t ptl;
 73 #endif
 74             struct kmem_cache *slab;    /* SLUB: Pointer to slab */
 75             struct page *first_page;    /* Compound tail pages */
 76         };
 77         union {
 78                 pgoff_t index;          /* Our offset within mapping. */
 79                 void *freelist;         /* SLUB: freelist req. slab lock */
 80         };
 81         struct list_head lru;           /* Pageout list, eg. active_list
 82                                          * protected by zone->lru_lock !
 83                                          */
 84         /*
 85          * On machines where all RAM is mapped into kernel address space,
 86          * we can simply calculate the virtual address. On machines with
 87          * highmem some memory is mapped into kernel virtual memory
 88          * dynamically, so we need a place to store that address.
 89          * Note that this field could be 16 bits on x86 ... ;)
 90          *
 91          * Architectures with slow multiplication can define
 92          * WANT_PAGE_VIRTUAL in asm/page.h
 93          */
 94 #if defined(WANT_PAGE_VIRTUAL)
 95         void *virtual;                  /* Kernel virtual address (NULL if
 96                                            not kmapped, ie. highmem) */
 97 #endif /* WANT_PAGE_VIRTUAL */
 98 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
 99         unsigned long debug_flags;      /* Use atomic bitops on this */
100 #endif
101 
102 #ifdef CONFIG_KMEMCHECK
103         /*
104          * kmemcheck wants to track the status of each byte in a page; this
105          * is a pointer to such a status block. NULL if not tracked.
106          */
107         void *shadow;
108 #endif
109 };

flags:页标志位,表示页帧当前状态;如PG_buddy表示该页帧属于伙伴系统
lru:将page描述符串联起来组成链表;如伙伴系统中将空闲内存块首个页描述符通过lru组成空闲内存块链表
private:页私有数据,当用于伙伴系统时用来存储order大小

ii.区域描述符struct zone
计算机的体系结构对页帧的使用有一些限制,不是每个页帧都能使用在任意情况(如ISA总线的DMA只能使用低16MB、32位CPU的地址空间不能访问所有的物理内存),linux使用区域概念对页帧进行分组;
X86中主要分为以下几个区域:
ZONE_DMA:16MB以下
ZONE_NORMAL:内核的直接映射区,16MB~896MB
ZONE_HIGHMEM:不能直接映射区,896MB以上
每个区域用区域描述符表示:

 286 struct zone {
 287         /* Fields commonly accessed by the page allocator */
 288 
 289         /* zone watermarks, access with *_wmark_pages(zone) macros */
 290         unsigned long watermark[NR_WMARK];
 291 
 292         /*
 293          * When free pages are below this point, additional steps are taken
 294          * when reading the number of free pages to avoid per-cpu counter
 295          * drift allowing watermarks to be breached
 296          */
 297         unsigned long percpu_drift_mark;
 298 
 299         /*
 300          * We don't know if the memory that we're going to allocate will be freeable
 301          * or/and it will be released eventually, so to avoid totally wasting several
 302          * GB of ram we must reserve some of the lower zone memory (otherwise we risk
 303          * to run OOM on the lower zones despite there's tons of freeable ram
 304          * on the higher zones). This array is recalculated at runtime if the
 305          * sysctl_lowmem_reserve_ratio sysctl changes.
 306          */
 307         unsigned long           lowmem_reserve[MAX_NR_ZONES];
 308 
 309 #ifdef CONFIG_NUMA
 310         int node;
 311         /*
 312          * zone reclaim becomes active if more unmapped pages exist.
 313          */
 314         unsigned long           min_unmapped_pages;
 315         unsigned long           min_slab_pages;
 316         struct per_cpu_pageset  *pageset[NR_CPUS];
 317 #else
 318         struct per_cpu_pageset  pageset[NR_CPUS];
 319 #endif
 320         /*
 321          * free areas of different sizes
 322          */
 323         spinlock_t              lock;
 324 #ifdef CONFIG_MEMORY_HOTPLUG
 325         /* see spanned/present_pages for more description */
 326         seqlock_t               span_seqlock;
 327 #endif
 328         struct free_area        free_area[MAX_ORDER];
 329 
 330 #ifndef CONFIG_SPARSEMEM
 331         /*
 332          * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 333          * In SPARSEMEM, this map is stored in struct mem_section
 334          */
 335         unsigned long           *pageblock_flags;
 336 #endif /* CONFIG_SPARSEMEM */
 337 
 338 
 339         ZONE_PADDING(_pad1_)
 340 
 341         /* Fields commonly accessed by the page reclaim scanner */
 342         spinlock_t              lru_lock;
 343         struct zone_lru {
 344                 struct list_head list;
 345         } lru[NR_LRU_LISTS];
 346 
 347         struct zone_reclaim_stat reclaim_stat;
 348 
 349         unsigned long           pages_scanned;     /* since last reclaim */
 350         unsigned long           flags;             /* zone flags, see below */
 351 
 352         /* Zone statistics */
 353         atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
 354 
 355         /*
 356          * prev_priority holds the scanning priority for this zone.  It is
 357          * defined as the scanning priority at which we achieved our reclaim
 358          * target at the previous try_to_free_pages() or balance_pgdat()
 359          * invokation.
 360          *
 361          * We use prev_priority as a measure of how much stress page reclaim is
 362          * under - it drives the swappiness decision: whether to unmap mapped
 363          * pages.
 364          *
 365          * Access to both this field is quite racy even on uniprocessor.  But
 366          * it is expected to average out OK.
 367          */
 368         int prev_priority;
 369 
 370         /*
 371          * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
 372          * this zone's LRU.  Maintained by the pageout code.
 373          */
 374         unsigned int inactive_ratio;
 375 
 376 
 377         ZONE_PADDING(_pad2_)
 378         /* Rarely used or read-mostly fields */
 379 
 380         /*
 381          * wait_table           -- the array holding the hash table
 382          * wait_table_hash_nr_entries   -- the size of the hash table array
 383          * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
 384          *
 385          * The purpose of all these is to keep track of the people
 386          * waiting for a page to become available and make them
 387          * runnable again when possible. The trouble is that this
 388          * consumes a lot of space, especially when so few things
 389          * wait on pages at a given time. So instead of using
 390          * per-page waitqueues, we use a waitqueue hash table.
 391          *
 392          * The bucket discipline is to sleep on the same queue when
 393          * colliding and wake all in that wait queue when removing.
 394          * When something wakes, it must check to be sure its page is
 395          * truly available, a la thundering herd. The cost of a
 396          * collision is great, but given the expected load of the
 397          * table, they should be so rare as to be outweighed by the
 398          * benefits from the saved space.
 399          *
 400          * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
 401          * primary users of these fields, and in mm/page_alloc.c
 402          * free_area_init_core() performs the initialization of them.
 403          */
 404         wait_queue_head_t       * wait_table;
 405         unsigned long           wait_table_hash_nr_entries;
 406         unsigned long           wait_table_bits;
 407 
 408         /*
 409          * Discontig memory support fields.
 410          */
 411         struct pglist_data      *zone_pgdat;
 412         /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
 413         unsigned long           zone_start_pfn;
 414 
 415         /*
 416          * zone_start_pfn, spanned_pages and present_pages are all
 417          * protected by span_seqlock.  It is a seqlock because it has
 418          * to be read outside of zone->lock, and it is done in the main
 419          * allocator path.  But, it is written quite infrequently.
 420          *
 421          * The lock is declared along with zone->lock because it is
 422          * frequently read in proximity to zone->lock.  It's good to
 423          * give them a chance of being in the same cacheline.
 424          */
 425         unsigned long           spanned_pages;  /* total size, including holes */
 426         unsigned long           present_pages;  /* amount of memory (excluding holes) */
 427 
 428         /*
 429          * rarely used fields:
 430          */
 431         const char            
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值