A.1#【内存管理】——1.1.1 node:struct pglist_data(pg_data_t)

 

3 存储节点Node

node 详细讲解

为了支持NUMA模型,Linux系统把物理内存划分为多个Node,内核中通过pg_data_t来描述一个Node每个Node关联到一个处理器

对于PC、手机这种采用UMA模型的机器来说,系统只有一个全局的Nodecontig_page_data。

在分配一个页面时,Linux采用节点局部分配的策略,从最靠近运行中的CPU的节点分配内存,

由于进程往往是在同一个CUP上运行,因此从当前节点得到的内存最可能被用到

it is likely the memory from the current node will be used)。

3.1  pglist_data(pg_data_t)数据结构


[include/linux/mmzone.h]
node的数据结构为pglist_data, 每一个node对应一个struct pglist_data.;

每个节点都由pg_data_t描述,而pg_data_t 由struct pglist_data 定义而来。
pg_data_t的数据结构:参看相关章节

在linux环境中我们可以使用numactl命令查看Node中的cpu和内存,以及各个node之间的distance

numactl --hardware

available: 2 nodes (0-1)
node 0 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
node 0 size: 131037 MB
node 0 free: 3019 MB
node 1 cpus: 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31
node 1 size: 131071 MB
node 1 free: 9799 MB
node distances:
node 0 1
 0: 10 20
 1: 20 10
 

所有的节点都由一个称为pgdat_list的链表维护,这些节点都放在该链表中,均由函数init_bootmme_core()初始化节点。

typedef struct pglist_data {
    struct zone node_zones[MAX_NR_ZONES];
    struct zonelist node_zonelists[MAX_ZONELISTS];
    int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP    /* means !SPARSEMEM */
    struct page *node_mem_map;
#ifdef CONFIG_PAGE_EXTENSION
    struct page_ext *node_page_ext;
#endif
#endif
#ifndef CONFIG_NO_BOOTMEM
    struct bootmem_data *bdata;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
    /*
     * Must be held any time you expect node_start_pfn, node_present_pages
     * or node_spanned_pages stay constant.  Holding this will also
     * guarantee that any pfn_valid() stays that way.
     *
     * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
     * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG.
     *
     * Nests above zone->lock and zone->span_seqlock
     */
    spinlock_t node_size_lock;
#endif
    unsigned long node_start_pfn;
    unsigned long node_present_pages; /* total number of physical pages */
    unsigned long node_spanned_pages; /* total size of physical page
                         range, including holes */
    int node_id;
    wait_queue_head_t kswapd_wait;
    wait_queue_head_t pfmemalloc_wait;
    struct task_struct *kswapd;    /* Protected by
                       mem_hotplug_begin/end() */
    int kswapd_order;
    enum zone_type kswapd_classzone_idx;

    int kswapd_failures;        /* Number of 'reclaimed == 0' runs */

#ifdef CONFIG_COMPACTION
    int kcompactd_max_order;
    enum zone_type kcompactd_classzone_idx;
    wait_queue_head_t kcompactd_wait;
    struct task_struct *kcompactd;
#endif
#ifdef CONFIG_NUMA_BALANCING
    /* Lock serializing the migrate rate limiting window */
    spinlock_t numabalancing_migrate_lock;

    /* Rate limiting time interval */
    unsigned long numabalancing_migrate_next_window;

    /* Number of pages migrated during the rate limiting time interval */
    unsigned long numabalancing_migrate_nr_pages;
#endif
    /*
     * This is a per-node reserve of pages that are not available
     * to userspace allocations.
     */
    unsigned long        totalreserve_pages;

#ifdef CONFIG_NUMA
    /*
     * zone reclaim becomes active if more unmapped pages exist.
     */
    unsigned long        min_unmapped_pages;
    unsigned long        min_slab_pages;
#endif /* CONFIG_NUMA */

    /* Write-intensive fields used by page reclaim */
    ZONE_PADDING(_pad1_)
    spinlock_t        lru_lock;

#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
    /*
     * If memory initialisation on large machines is deferred then this
     * is the first PFN that needs to be initialised.
     */
    unsigned long first_deferred_pfn;
    /* Number of non-deferred pages */
    unsigned long static_init_pgcnt;
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
    spinlock_t split_queue_lock;
    struct list_head split_queue;
    unsigned long split_queue_len;
#endif

    /* Fields commonly accessed by the page reclaim scanner */
    struct lruvec        lruvec;

    /*
     * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
     * this node's LRU.  Maintained by the pageout code.
     */
    unsigned int inactive_ratio;

    unsigned long        flags;

    ZONE_PADDING(_pad2_)

    /* Per-node vmstats */
    struct per_cpu_nodestat __percpu  *per_cpu_nodestats;
    atomic_long_t        vm_stat[NR_VM_NODE_STAT_ITEMS];
} pg_data_t;

node_zones[MAX_NR_ZONES]   对应该node包含的各个类型的zone
node_zonelists该node的备选节点及内存域列表,见下一章讲解
node_mem_maplinux为每个物理页分配了一个struct page的管理结构体,并形成了一个结构体数组,node_mem_map即为数组的指针;pfn_to_page和page_to_pfn都借助该数组实现。
nr_zones包含zone的个数
node_start_pfn该node中内存的起始页帧号
node_present_pages     该node地址范围内的实际管理的页面数量
node_spanned_pages    该node地址范围内的所有page页数,包括空洞;目前还不清楚什么情况导致与node_present_pages不同。
node_id节点标识符
kswapd    负责回收该node内存的内核线程,每个node对应一个内核线程kswapd
kswapd_waitnode的等待队列,交换守护列队进程的等待列表
kswapd_max_order需要释放的区域的长度,以页阶为单位
lru_lock    用于对LRU链表并行访问时进行保护
lruvec     LRU链表的集合
flags    内存域的当前状态, 在mmzone.h定义了zone的所有可用zone_flag
vm_stat    node的计数
ZONE_PADDING宏作用是让前后的成员分布在不同的cache line中, 以空间换取时间。
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
npm WARN ERESOLVE overriding peer dependency npm WARN ERESOLVE overriding peer dependency [###...............] / idealTree:yargs: timing idealTree:node_modules/.pnpm/@babel+preset-modules@0.1.5_@babel+core@7[###...............] / idealTree:@commitlint/config-conventional: timing idealTree:node_modules/.pnpm/@commitlint+con[###...............] | idealTree:@commitlint/read: timing idealTree:node_modules/.pnpm/@commitlint+read@17.5.1/node_m[###...............] | idealTree:@commitlint/read: timing idealTree:node_modules/.pnpm/@commitlint+read@17.5.1/node_m[###...............] - idealTree:@commitlint/read: timing idealTree:node_modules/.pnpm/@commitlint+read@17.5.1/node_m[###...............] | idealTree:@commitlint/read: timing idealTree:node_modules/.pnpm/@commitlint+read@17.5.1/node_m[##................] \ idealTree:loader-utils: sill placeDep node_modules/.pnpm/@cspotcode+source-map-support@0.8.1 j[##................] - idealTree:karma-coverage: sill placeDep node_modules/.pnpm/@eslint+eslintrc@2.0.3/node_modules[##................] | idealTree:sinon: sill placeDep node_modules/.pnpm/@eslint+eslintrc@2.0.3 supports-color@7.2.0 [##................] / idealTree:espree: timing idealTree:node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/p[##................] - idealTree:espree: timing idealTree:node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/p[##................] \ idealTree:xo: sill placeDep node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/promisif[##................] \ idealTree:xo: sill placeDep node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/promisif[##................] / idealTree:xo: sill placeDep node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/promisif[##................] / idealTree:xo: sill placeDep node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/promisif[##................] | idealTree:xo: sill placeDep node_modules/.pnpm/@gar+promisify@1.1.3/node_modules/@gar/promisif[##................] \ idealTree:c8: timing idealTree:node_modules/.pnpm/@humanwhocodes+module-importer@1.0.1/node_mo[##................] / idealTree:@iconify-json/carbon: timing idealTree:node_modules/.pnpm/@iconify-json+carbon@1.1.1[##................] / idealTree:@iconify-json/carbon: timing idealTree:node_modules/.pnpm/@iconify-json+carbon@1.1.1
最新发布
07-04

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值