mempolicy

最新推荐文章于 2023-06-01 11:52:16 发布

tiantao2012

最新推荐文章于 2023-06-01 11:52:16 发布

阅读量1.3k

点赞数

分类专栏： Linux 源码分析

本文链接：https://blog.csdn.net/tiantao2012/article/details/80364514

版权

Linux 源码分析专栏收录该内容

769 篇文章 114 订阅

订阅专栏

vma目前有四种内存分配策略，这四种用法如下：
 * Support four policies per VMA and per process:
 *
 * The VMA policy has priority over the process policy for a page fault.
 *
 * interleave     Allocate memory interleaved over a set of nodes,
 *                with normal fallback if it fails.
 *                For VMA based allocations this interleaves based on the
 *                offset into the backing object or offset into the mapping
 *                for anonymous memory. For process policy an process counter
 *                is used.
 *
 * bind           Only allocate memory on a specific set of nodes,
 *                no fallback.
 *                FIXME: memory is allocated starting with the first node
 *                to the last. It would be better if bind would truly restrict
 *                the allocation to memory nodes instead
 *
 * preferred       Try a specific node first before normal fallback.
 *                As a special case NUMA_NO_NODE here means do the allocation
 *                on the local CPU. This is normally identical to default,
 *                but useful to set in a VMA when you have a non default
 *                process policy.
 *
 * default        Allocate on the local node first, or when on a VMA
 *                use the process policy. This is what Linux always did
 *		  in a NUMA aware kernel and still does by, ahem, default.
在numa_policy_init 这个函数中会将所有numa节点的内存分配策略设置为MPOL_PREFERRED，即有限从指定的numa
节点上分配内存。
void __init numa_policy_init(void)
{
	nodemask_t interleave_nodes;
	unsigned long largest = 0;
	int nid, prefer = 0;
	#设置每个numa 节点的内存策略为MPOL_PREFERRED
	for_each_node(nid) {
		preferred_node_policy[nid] = (struct mempolicy) {
			.refcnt = ATOMIC_INIT(1),
			.mode = MPOL_PREFERRED,
			.flags = MPOL_F_MOF | MPOL_F_MORON,
			.v = { .preferred_node = nid, },
		};
	}

	/*
	 * Set interleaving policy for system init. Interleaving is only
	 * enabled across suitably sized nodes (default is >= 16MB), or
	 * fall back to the largest node if they're all smaller.
	 */
	#下面两种情况设置numa分配策略为interleave_nodes
	nodes_clear(interleave_nodes);
	for_each_node_state(nid, N_MEMORY) {
		unsigned long total_pages = node_present_pages(nid);

		/* Preserve the largest node */
		if (largest < total_pages) {
			largest = total_pages;
			prefer = nid;
		}

		/* Interleave this node? */
		#numa节点size 大于16M
		if ((total_pages << PAGE_SHIFT) >= (16 << 20))
			node_set(nid, interleave_nodes);
	}

	/* All too small, use the largest */
	#如果所有的节点size 都要与16M，这样设置为interleave_nodes，以便系统用最大numa节点的memory
	if (unlikely(nodes_empty(interleave_nodes)))
		node_set(prefer, interleave_nodes);
	#将前面已经更新好的interleave_nodes设为为MPOL_INTERLEAVE
	if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
		pr_err("%s: interleaving failed\n", __func__);
	#检查是否开启numabalance
	check_numabalancing_enable();
}

实际使用过程中可以通过get_task_policy 来得到task的内存分配策略
struct mempolicy *get_task_policy(struct task_struct *p)
{
	struct mempolicy *pol = p->mempolicy;
	int node;
	#task中保存的mempolicy 不为null的话，则直接返回
	if (pol)
		return pol;
	#numa_node_id中调用raw_cpu_read(numa_node)来根据cpuid来得到当前的numa id

	node = numa_node_id();
	#如果task中的mempolicy为null，则返回numa_policy_init 中初始化的preferred_node_policy中当前task对应的node的mempolicy
	if (node != NUMA_NO_NODE) {
		pol = &preferred_node_policy[node];
		/* preferred_node_policy is not initialised early in boot */
		if (pol->mode)
			return pol;
	}

	return &default_policy;
}