linux 内存管理 一 概念

1.内存组织
UMA(uniform memory access)
NUMA(non-uniform memory access)

linux将内存分为结点,每个结点关联一个cpu,用pg_data_t表示
各结点又划分为内存域,DMA, normal , highmem, movable

enum zone_type {
    ZONE_DMA,
    ZONE_NORMAL,
    ZONE_HIGHMEM,
    ZONE_MOVABLE,
    __MAX_NR_ZONES
};

在系统启动时,__build_all_zonelists会通过pg_data_t *pgdat = NODE_DATA(nid);得到全局变量contig_page_data,再初始化这个全局变量

include/linux/mmzone.h
typedef struct pglist_data {
    struct zone node_zones[MAX_NR_ZONES];
    struct zonelist node_zonelists[MAX_ZONELISTS];
    int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
    struct page *node_mem_map;
#ifdef CONFIG_MEMCG
    struct page_cgroup *node_page_cgroup;
#endif
#endif
#ifndef CONFIG_NO_BOOTMEM
    struct bootmem_data *bdata;
#endif
    unsigned long node_start_pfn;
    unsigned long node_present_pages; /* total number of physical pages */
    unsigned long node_spanned_pages; /* total size of physical page
                         range, including holes */
    int node_id;
    nodemask_t reclaim_nodes;   /* Nodes allowed to reclaim from */
    wait_queue_head_t kswapd_wait;
    wait_queue_head_t pfmemalloc_wait;
    struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
    int kswapd_max_order;
    enum zone_type classzone_idx;
#ifdef CONFIG_NUMA_BALANCING
    /*
     * Lock serializing the per destination node AutoNUMA memory
     * migration rate limiting data.
     */
    spinlock_t numabalancing_migrate_lock;

    /* Rate limiting time interval */
    unsigned long numabalancing_migrate_next_window;

    /* Number of pages migrated during the rate limiting time interval */
    unsigned long numabalancing_migrate_nr_pages;
#endif
} pg_data_t;
  contig_page_data = (
    node_zones = ((watermark = (687, 1323,
    node_zonelists = ((zlcache_ptr = 0x0,
    nr_zones = 1,
    node_mem_map = 0xC098F000,//指向mem_map的地址,即page的数组
    node_page_cgroup = 0xC0E19000,
    bdata = 0xC07717E8,
    node_start_pfn = 524288,
    node_present_pages = 131072,
    node_spanned_pages = 131072,
    node_id = 0,
    reclaim_nodes = (bits = (0)),
    kswapd_wait = (lock = (rlock = (raw_lo
    pfmemalloc_wait = (lock = (rlock = (ra
    kswapd = 0xDF3E4440,
    kswapd_max_order = 0,
    classzone_idx = ZONE_NORMAL)
include/linux/mmzone.h
struct zone {
    unsigned long watermark[NR_WMARK];
    unsigned long       lowmem_reserve[MAX_NR_ZONES];
    struct free_area    free_area[MAX_ORDER];
    struct per_cpu_pageset __percpu *pageset;

...
} ____cacheline_internodealigned_in_smp;
struct per_cpu_pageset {
    struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
    s8 expire;
#endif
#ifdef CONFIG_SMP
    s8 stat_threshold;
    s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
#endif
};
struct per_cpu_pages {
    int count;      /* number of pages in the list */
    int high;       /* high watermark, emptying needed */
    int batch;      /* chunk size for buddy add/remove */

    /* Lists of pages, one per migrate type stored on the pcp-lists */
    struct list_head lists[MIGRATE_PCPTYPES];
};
struct page {
    /* First double word block */
    unsigned long flags;        /* Atomic flags, some possibly
                     * updated asynchronously */
    struct address_space *mapping;  /* If low bit clear, points to
                     * inode address_space, or NULL.
                     * If page mapped as anonymous
                     * memory, low bit is set, and
                     * it points to anon_vma object:
                     * see PAGE_MAPPING_ANON below.
                     */
    /* Second double word */
    struct {
        union {
            pgoff_t index;      /* Our offset within mapping. */
            void *freelist;     /* slub/slob first free object */
            bool pfmemalloc;    /* If set by the page allocator,
                         * ALLOC_NO_WATERMARKS was set
                         * and the low watermark was not
                         * met implying that the system
                         * is under some pressure. The
                         * caller should try ensure
                         * this page is only used to
                         * free other pages.
                         */
        };

        union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
            /* Used for cmpxchg_double in slub */
            unsigned long counters;
#else
            /*
             * Keep _count separate from slub cmpxchg_double data.
             * As the rest of the double word is protected by
             * slab_lock but _count is not.
             */
            unsigned counters;
#endif

            struct {

                union {
                    /*
                     * Count of ptes mapped in
                     * mms, to show when page is
                     * mapped & limit reverse map
                     * searches.
                     *
                     * Used also for tail pages
                     * refcounting instead of
                     * _count. Tail pages cannot
                     * be mapped and keeping the
                     * tail page _count zero at
                     * all times guarantees
                     * get_page_unless_zero() will
                     * never succeed on tail
                     * pages.
                     */
                    atomic_t _mapcount;

                    struct { /* SLUB */
                        unsigned inuse:16;
                        unsigned objects:15;
                        unsigned frozen:1;
                    };
                    int units;  /* SLOB */
                };
                atomic_t _count;        /* Usage count, see below. */
            };
        };
    };

    /* Third double word block */
    union {
        struct list_head lru;   /* Pageout list, eg. active_list
                     * protected by zone->lru_lock !
                     */
        struct {        /* slub per cpu partial pages */
            struct page *next;  /* Next partial slab */
#ifdef CONFIG_64BIT
            int pages;  /* Nr of partial slabs left */
            int pobjects;   /* Approximate # of objects */
#else
            short int pages;
            short int pobjects;
#endif
        };

        struct list_head list;  /* slobs list of pages */
        struct slab *slab_page; /* slab fields */
    };

    /* Remainder is not double word aligned */
    union {
        unsigned long private;      /* Mapping-private opaque data:
                         * usually used for buffer_heads
                         * if PagePrivate set; used for
                         * swp_entry_t if PageSwapCache;
                         * indicates order in the buddy
                         * system if PG_buddy is set.
                         */
#if USE_SPLIT_PTLOCKS
        spinlock_t ptl;
#endif
        struct kmem_cache *slab_cache;  /* SL[AU]B: Pointer to slab */
        struct page *first_page;    /* Compound tail pages */
    };

    /*
     * On machines where all RAM is mapped into kernel address space,
     * we can simply calculate the virtual address. On machines with
     * highmem some memory is mapped into kernel virtual memory
     * dynamically, so we need a place to store that address.
     * Note that this field could be 16 bits on x86 ... ;)
     *
     * Architectures with slow multiplication can define
     * WANT_PAGE_VIRTUAL in asm/page.h
     */
#if defined(WANT_PAGE_VIRTUAL)
    void *virtual;          /* Kernel virtual address (NULL if
                       not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
    unsigned long debug_flags;  /* Use atomic bitops on this */
#endif

#ifdef CONFIG_KMEMCHECK
    /*
     * kmemcheck wants to track the status of each byte in a page; this
     * is a pointer to such a status block. NULL if not tracked.
     */
    void *shadow;
#endif

#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
    int _last_nid;
#endif
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值