linux3.10 内存管理(四)slab机制详解2初始化下

kmem_cache中对于每CPU都有一个array_cache,已作为每CPU申请内存的缓存.  此函数的目的在于:每个kmem_cache都有一个kmem_cache_node实例,该实例的shared可以供一个kmem_cache上所有CPU的内存申请缓存.  但是上一篇文章中对于kmem_cache中array_cache的值初始化体现不出缓存思想,而且对于kmem_cache中的kmem_cache_node.shared也没有利用.kmem_cache_init_late的目的就在于完善slab分配器的缓存机制.。

1 Shared cache的作用是什么?

shared cache实际上充当了cpu_cache与slab之间的缓存。当cpu_cache中的object用完了,可以先从shared cache中获取object,如果shared object中没有object,才会到slab中申请object;同样,当释放object时,如果cpu_cache已满,则会将object释放到shared cache中,如果shared cache也满了,才会放到slab中。

shared cache机制有以下几点好处:

    1 将object释放到shared cache中要比将object释放到slab中速度快;同样,从shared cache中申请object,也比从slab中申请object速度快。所以shared cache加快了申请和释放内存的速度。
    2 增加了硬件cache命中的几率。短时间内连续申请和释放同样大小的object在系统上是非常常见的,这种场景下object被释放到shared cache中在不久的将来又会从shared cache中获取该object,该object在硬件cache中已经被驱逐的概率比较小,命中的概率比较大;而如果object被释放到slab中,则下次很可能从其他slab中分配object,前面的object被驱逐的概率增加,同样新申请的object不命令的概率也增加。

 2 shared cache初始化

start_kernel

         --------->kmem_cache_init_late

void __init kmem_cache_init_late(void)
{
	struct kmem_cache *cachep;

	slab_state = UP;

	/* 6) resize the head arrays to their final sizes */
	mutex_lock(&slab_mutex);
	list_for_each_entry(cachep, &slab_caches, list)//遍历之前已经创建的kmem_cache,都挂载在slab_caches链表上
		if (enable_cpucache(cachep, GFP_NOWAIT))//enable_cpucache会计算kmem cache实例的shared的值,为shared cache分配空间
			BUG();
	mutex_unlock(&slab_mutex);

	/* Annotate slab for lockdep -- annotate the malloc caches */
	init_lock_keys();

	/* Done! */
	slab_state = FULL;

	/*
	 * Register a cpu startup notifier callback that initializes
	 * cpu_cache_get for all new cpus
	 */
	register_cpu_notifier(&cpucache_notifier);

#ifdef CONFIG_NUMA
	/*
	 * Register a memory hotplug callback that initializes and frees
	 * node.
	 */
	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif

	/*
	 * The reap timers are started later, with a module init call: That part
	 * of the kernel is not yet operational.
	 */
}

上面函数对所有已经创建的kmem cache做cpucache使能,调用接口enable_cpucache。主要分析enable_cpucache:

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
	int err;
	int limit = 0;
	int shared = 0;
	int batchcount = 0;

	if (!is_root_cache(cachep)) {
		struct kmem_cache *root = memcg_root_cache(cachep);
		limit = root->limit;
		shared = root->shared;
		batchcount = root->batchcount;
	}

	if (limit && shared && batchcount)
		goto skip_setup;
	/*
	 * The head array serves three purposes:
	 * - create a LIFO ordering, i.e. return objects that are cache-warm
	 * - reduce the number of spinlock operations.
	 * - reduce the number of linked list operations on the slab and
	 *   bufctl chains: array operations are cheaper.
	 * The numbers are guessed, we should auto-tune as described by
	 * Bonwick.
	 */
	if (cachep->size > 131072)
		limit = 1;
	else if (cachep->size > PAGE_SIZE)
		limit = 8;
	else if (cachep->size > 1024)
		limit = 24;
	else if (cachep->size > 256)
		limit = 54;
	else
		limit = 120;

	/*
	 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
	 * allocation behaviour: Most allocs on one cpu, most free operations
	 * on another cpu. For these cases, an efficient object passing between
	 * cpus is necessary. This is provided by a shared array. The array
	 * replaces Bonwick's magazine layer.
	 * On uniprocessor, it's functionally equivalent (but less efficient)
	 * to a larger limit. Thus disabled by default.
	 */
	shared = 0;
	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
		shared = 8;
//当cachep->size,并且cpu不止一个时,才会设置shared,shared 只在多核cpu上才生效
#if DEBUG
	/*
	 * With debugging enabled, large batchcount lead to excessively long
	 * periods with disabled local interrupts. Limit the batchcount
	 */
	if (limit > 32)
		limit = 32;
#endif
	batchcount = (limit + 1) / 2;
skip_setup:
	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
	if (err)
		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
		       cachep->name, -err);
	return err;
}

enable_cpucache会计算kmem cache实例的shared的值,shared*batchcount是shared cache中可以缓存的object 的最大数量,分析do_tune_cpucache:

do_tune_cpucache

          -------------->__do_tune_cpucache

static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
				int batchcount, int shared, gfp_t gfp)
{
	struct ccupdate_struct *new;
	int i;
        //申请一个临时结构ccupdate_struct ,用来存放新申请的arraycache
	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
		      gfp);
	if (!new)
		return -ENOMEM;

	for_each_online_cpu(i) {
		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
						batchcount, gfp);
                //根据limit, batchcount数值,构建新的array_cache实例.
                //因为kmem_cache中的array_cache是每个CPU的,所以此处是循环,为每个CPU都
               //都构建一个array_cache实例.
		if (!new->new[i]) {
			for (i--; i >= 0; i--)
				kfree(new->new[i]);
			kfree(new);
			return -ENOMEM;
		}
	}
	new->cachep = cachep;

	on_each_cpu(do_ccupdate_local, (void *)new, 1);
//将kmem_cache下的每个CPU的array_cache[i]更换成new->new[i];

	check_irq_on();
	cachep->batchcount = batchcount;
	cachep->limit = limit;
	cachep->shared = shared;

        //上面以替换了kmem_cache下的每个CPU的array_cache[i],
        //因此需要把原来的array_cache释放掉.
	for_each_online_cpu(i) {
		struct array_cache *ccold = new->new[i];
		if (!ccold)
			continue;
		spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
		free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
               // 此函数,就是把ccold->avail个ccole->entry中的数组元素指向的内存空间
                 //释放给slab管理器.不过在这边ccold->avail为0

		spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
		kfree(ccold);
                //基本同于free_block,我们知道slab所管理的内存都是位于低端内存,低端内存的物
              //   理地址及其对应的虚拟地址存在固定偏移,因此根据该部分的虚拟地址可以很容易的找到
             //  struct page实例,而struct page中的lru链表,在slab中被复用了,根据链表
              //  指针可以找到kmem_cache实例,所以kfree基本等同于free_block;
              //  但是kfree与free_block的重要的不同点在于,free_block直接将内存释放给了
             //  slab管理器,而kfree首选将内存释放给每CPU的array_cache数组.
	}
	kfree(new);
	return alloc_kmemlist(cachep, gfp);
}

1 先看一下alloc_arraycache,分配一个array_cache结构,并对该结构进行初始化

static struct array_cache *alloc_arraycache(int node, int entries,
					    int batchcount, gfp_t gfp)
{
	int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
	struct array_cache *nc = NULL;

	nc = kmalloc_node(memsize, gfp, node);
	/*
	 * The array_cache structures contain pointers to free object.
	 * However, when such objects are allocated or transferred to another
	 * cache the pointers are not cleared and they could be counted as
	 * valid references during a kmemleak scan. Therefore, kmemleak must
	 * not scan such objects.
	 */
	kmemleak_no_scan(nc);
	if (nc) {
		nc->avail = 0;
		nc->limit = entries;
		nc->batchcount = batchcount;
		nc->touched = 0;
		spin_lock_init(&nc->lock);
	}
	return nc;
}

 2 再看一下这个函数:on_each_cpu(do_ccupdate_local, (void *)new, 1);,真正调用的是这个do_ccupdate_local:

static void do_ccupdate_local(void *info)
{
	struct ccupdate_struct *new = info;
	struct array_cache *old;

	check_irq_off();
	old = cpu_cache_get(new->cachep);

	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
	new->new[smp_processor_id()] = old;
}

该函数的作用是把新分配的array_cache结构替换原来kmem_cache上的array_cache结构。

然后调用free_block则是要把原来array_cache上的内存释放到slab管理器,看一下具体实现:

3 free_block

static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
		       int node)
{
	int i;
	struct kmem_cache_node *n;

	for (i = 0; i < nr_objects; i++) {
		void *objp;
		struct slab *slabp;

		clear_obj_pfmemalloc(&objpp[i]);
		objp = objpp[i];

		slabp = virt_to_slab(objp);//一般slab空间都不会超过一个页,所以对其以后就能找到其虚拟地址页框,接着根据页框索引找到其page管理结构,由于page中又又结构指向slab,所以最终取到slabp 
		n = cachep->node[node];
		list_del(&slabp->list);//从kmem_cache_node的某个链表上取下slab
		check_spinlock_acquired_node(cachep, node);
		check_slabp(cachep, slabp);
		slab_put_obj(cachep, slabp, objp, node);//把要释放的obj放回slab管理器
		STATS_DEC_ACTIVE(cachep);
		n->free_objects++;//增加空闲数量统计
		check_slabp(cachep, slabp);

		/* fixup slab chains */
		if (slabp->inuse == 0) { //如果slab中内存全部是空闲
			if (n->free_objects > n->free_limit) {
                        //如果空闲内存数量大于设定的值,则需要向伙伴系统释放掉一些内存
				n->free_objects -= cachep->num;
				/* No need to drop any previously held
				 * lock here, even if we have a off-slab slab
				 * descriptor it is guaranteed to come from
				 * a different cache, refer to comments before
				 * alloc_slabmgmt.
				 */
				slab_destroy(cachep, slabp);
			} else {
				list_add(&slabp->list, &n->slabs_free);//把该slab放到空闲链表
			}
		} else {
			/* Unconditionally move a slab to the end of the
			 * partial list on free - maximum time for the
			 * other objects to be freed, too.
			 */
                        //不全部空闲,则放到slabs_partial链表
			list_add_tail(&slabp->list, &n->slabs_partial);
		}
	}
}

 4 再看一下kfree

void kfree(const void *objp)
{
	struct kmem_cache *c;
	unsigned long flags;

	trace_kfree(_RET_IP_, objp);

	if (unlikely(ZERO_OR_NULL_PTR(objp)))
		return;
	local_irq_save(flags);
	kfree_debugcheck(objp);
	c = virt_to_cache(objp);
	debug_check_no_locks_freed(objp, c->object_size);

	debug_check_no_obj_freed(objp, c->object_size);
	__cache_free(c, (void *)objp, _RET_IP_);
	local_irq_restore(flags);
}

 核心函数是__cache_free:

static inline void __cache_free(struct kmem_cache *cachep, void *objp,
				unsigned long caller)
{
	struct array_cache *ac = cpu_cache_get(cachep);

	check_irq_off();
	kmemleak_free_recursive(objp, cachep->flags);
	objp = cache_free_debugcheck(cachep, objp, caller);

	kmemcheck_slab_free(cachep, objp, cachep->object_size);

	/*
	 * Skip calling cache_free_alien() when the platform is not numa.
	 * This will avoid cache misses that happen while accessing slabp (which
	 * is per page memory  reference) to get nodeid. Instead use a global
	 * variable to skip the call, which is mostly likely to be present in
	 * the cache.
	 */
	if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
		return;

	if (likely(ac->avail < ac->limit)) {
		STATS_INC_FREEHIT(cachep);
	} else {
		STATS_INC_FREEMISS(cachep);
		cache_flusharray(cachep, ac);//如果当前cpu array_cache中空闲内存过多,就释放一部分内存到shared array_cache中
	}

	ac_put_obj(cachep, ac, objp);//把要释放的内存放入当前cpu array_cache
}

5 alloc_kmemlist 为kmem_cache申请shared memory

static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
{
	int node;
	struct kmem_cache_node *n;
	struct array_cache *new_shared;
	struct array_cache **new_alien = NULL;

	for_each_online_node(node) {

                if (use_alien_caches) {
                        new_alien = alloc_alien_cache(node, cachep->limit, gfp);
                        if (!new_alien)
                                goto fail;
                }

		new_shared = NULL;
		if (cachep->shared) {// 为node 分配shared array_cache结构
			new_shared = alloc_arraycache(node,
				cachep->shared*cachep->batchcount,
					0xbaadf00d, gfp);
			if (!new_shared) {
				free_alien_cache(new_alien);
				goto fail;
			}
		}

		n = cachep->node[node];
		if (n) {
			struct array_cache *shared = n->shared;

			spin_lock_irq(&n->list_lock);

			if (shared)
				free_block(cachep, shared->entry,
						shared->avail, node);

			n->shared = new_shared;
			if (!n->alien) {
				n->alien = new_alien;
				new_alien = NULL;
			}
			n->free_limit = (1 + nr_cpus_node(node)) *
					cachep->batchcount + cachep->num;
			spin_unlock_irq(&n->list_lock);
			kfree(shared);
			free_alien_cache(new_alien);
			continue;
		}
		n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
		if (!n) {
			free_alien_cache(new_alien);
			kfree(new_shared);
			goto fail;
		}

		kmem_cache_node_init(n);
		n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
				((unsigned long)cachep) % REAPTIMEOUT_LIST3;
		n->shared = new_shared;
		n->alien = new_alien;
		n->free_limit = (1 + nr_cpus_node(node)) *
					cachep->batchcount + cachep->num;
		cachep->node[node] = n;
	}
	return 0;

fail:
	if (!cachep->list.next) {
		/* Cache is not active yet. Roll back what we did */
		node--;
		while (node >= 0) {
			if (cachep->node[node]) {
				n = cachep->node[node];

				kfree(n->shared);
				free_alien_cache(n->alien);
				kfree(n);
				cachep->node[node] = NULL;
			}
			node--;
		}
	}
	return -ENOMEM;
}

该函数的主要作用是对于当前kmem_cache,为每个kmem_cache_node分配一个shared    array_cache结构。

至此,初始化完毕。

 3 从 shared cache中分配和释放object

3.1 释放object到shared cache

__cache_free中如果cpu_cache已满,则调用cache_flusharray将cpu cache中最多batchcount个object释放到到shared cache中:

__cache_free

          ---------->cache_flusharray

static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
	int batchcount;
	struct kmem_cache_node *n;
	int node = numa_mem_id();

	batchcount = ac->batchcount;
#if DEBUG
	BUG_ON(!batchcount || batchcount > ac->avail);
#endif
	check_irq_off();
	n = cachep->node[node];
	spin_lock(&n->list_lock);
	if (n->shared) {
		struct array_cache *shared_array = n->shared;
		int max = shared_array->limit - shared_array->avail;
		if (max) {
			if (batchcount > max)
				batchcount = max;
                        //把batchcount 个object 复制到shared_array中
			memcpy(&(shared_array->entry[shared_array->avail]),
			       ac->entry, sizeof(void *) * batchcount);
			shared_array->avail += batchcount;
			goto free_done;
		}
	}

	free_block(cachep, ac->entry, batchcount, node);
free_done:
#if STATS
	{
		int i = 0;
		struct list_head *p;

		p = n->slabs_free.next;
		while (p != &(n->slabs_free)) {
			struct slab *slabp;

			slabp = list_entry(p, struct slab, list);
			BUG_ON(slabp->inuse);

			i++;
			p = p->next;
		}
		STATS_SET_FREEABLE(cachep, i);
	}
#endif
	spin_unlock(&n->list_lock);
	ac->avail -= batchcount; //从当前cpu的array_cache 中减去已经移动的object
        //把上面已经复制到shared_array中的object 从当前cpu的array_cache 中移除
	memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}

3.2 从shared cache分配object

当cpu cache中没有object可用时,会调用cache_alloc_refill申请object填入cpu cache中。

cache_alloc_refill会判断shared cache是否有object可用,如果有调用transfer_objects将shared cache中的object移入cpu cache中;如果shared cache中没有object可用,则到slab中获取object。

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
							bool force_refill)
{
	int batchcount;
	struct kmem_cache_node *n;
	struct array_cache *ac;
	int node;

	check_irq_off();
	node = numa_mem_id();
	if (unlikely(force_refill))
		goto force_grow;
retry:
	ac = cpu_cache_get(cachep);
	batchcount = ac->batchcount;
	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
		/*
		 * If there was little recent activity on this cache, then
		 * perform only a partial refill.  Otherwise we could generate
		 * refill bouncing.
		 */
		batchcount = BATCHREFILL_LIMIT;
	}
	n = cachep->node[node];

	BUG_ON(ac->avail > 0 || !n);
	spin_lock(&n->list_lock);

	/* See if we can refill from the shared array */
	if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
		n->shared->touched = 1;
		goto alloc_done;
	} //如果shared 存在,则从shared 中移动object到当前cpu的array_cache中

	while (batchcount > 0) {
		struct list_head *entry;
		struct slab *slabp;
		/* Get slab alloc is to come from. */
		entry = n->slabs_partial.next;
		if (entry == &n->slabs_partial) {
			n->free_touched = 1;
			entry = n->slabs_free.next;
			if (entry == &n->slabs_free)
				goto must_grow;
		}

		slabp = list_entry(entry, struct slab, list);
		check_slabp(cachep, slabp);
		check_spinlock_acquired(cachep);

		/*
		 * The slab was either on partial or free list so
		 * there must be at least one object available for
		 * allocation.
		 */
		BUG_ON(slabp->inuse >= cachep->num);

		while (slabp->inuse < cachep->num && batchcount--) {
			STATS_INC_ALLOCED(cachep);
			STATS_INC_ACTIVE(cachep);
			STATS_SET_HIGH(cachep);

			ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
									node));
		}
		check_slabp(cachep, slabp);

		/* move slabp to correct slabp list: */
		list_del(&slabp->list);
		if (slabp->free == BUFCTL_END)
			list_add(&slabp->list, &n->slabs_full);
		else
			list_add(&slabp->list, &n->slabs_partial);
	}

must_grow:
	n->free_objects -= ac->avail;
alloc_done:
	spin_unlock(&n->list_lock);

	if (unlikely(!ac->avail)) {
		int x;
force_grow:
		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

		/* cache_grow can reenable interrupts, then ac could change. */
		ac = cpu_cache_get(cachep);
		node = numa_mem_id();

		/* no objects in sight? abort */
		if (!x && (ac->avail == 0 || force_refill))
			return NULL;

		if (!ac->avail)		/* objects refilled by interrupt? */
			goto retry;
	}
	ac->touched = 1;
            //从shared中移动object以后,在这里已经能能从当前cpu的array_cache中分配成功了
	return ac_get_obj(cachep, ac, flags, force_refill);
}

 看一下transfer_objects:

static int transfer_objects(struct array_cache *to,
		struct array_cache *from, unsigned int max)
{
	/* Figure out how many entries to transfer */
	int nr = min3(from->avail, max, to->limit - to->avail);

	if (!nr)
		return 0;

	memcpy(to->entry + to->avail, from->entry + from->avail -nr,
			sizeof(void *) *nr);

	from->avail -= nr;
	to->avail += nr;
	return nr;
}

 移动shared中的object到当前cpu的array_cache

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值