slab对象的分配:
slab对象的分配使用kmem_cache_alloc():
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
return ret;
}
实际执行函数为slab_alloc()->__do_cache_alloc()->____cache_alloc()
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
bool force_refill = false;
check_irq_off();
ac = cpu_cache_get(cachep);---------------(1)
if (likely(ac->avail)) {------------------(2)
ac->touched = 1;
objp = ac_get_obj(cachep, ac, flags, false);------------(3)
/*
* Allow for the possibility all avail objects are not allowed
* by the current flags
*/
if (objp) {
STATS_INC_ALLOCHIT(cachep);
goto out;
}
force_refill = true;----------------(4)
}
STATS_INC_ALLOCMISS(cachep);
objp = cache_alloc_refill(cachep, flags, force_refill);--------------(5)
/*
* the 'ac' may be updated by cache_alloc_refill(),
* and kmemleak_erase() requires its correct value.
*/
ac = cpu_cache_get(cachep);
out:
/*
* To avoid a false negative, if an object that is in one of the
* per-CPU caches is leaked, we need to make sure kmemleak doesn't
* treat the array pointers as a reference to the object.
*/
if (objp)
kmemleak_erase(&ac->entry[ac->avail]);
return objp;
}
(1)获取array_cache结构体,由于之前初始化slab描述符时已经初始化完毕,固肯定能获取到。
(2)如果array_cache的avail值不为0表示当前cpu本地缓冲池中有空闲对象,则调用ac_get_obj获取slab对象。
(3)获取slab对象:
static inline void *ac_get_obj(struct kmem_cache *cachep,
struct array_cache *ac, gfp_t flags, bool force_refill)
{
void *objp;
if (unlikely(sk_memalloc_socks()))
objp = __ac_get_obj(cachep, ac, flags, force_refill);
else
objp = ac->entry[--ac->avail];//直接获取当前array_cache的entry数组最后一个成员
return objp;
}
(4)如果没有分配成功,则进入cache_alloc_refill()重新分配对象缓冲池
(5)如果array_cache不存在空闲对象,则直接调用核心函数cache_alloc_refill()获取slab对象。
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
bool force_refill)
{
int batchcount;
struct kmem_cache_node *n;
struct array_cache *ac;
int node;
check_irq_off();
node = numa_mem_id();
if (unlikely(force_refill))
goto force_grow;
retry:
ac = cpu_cache_get(cachep);--------------(1)
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
* If there was little recent activity on this cache, then
* perform only a partial refill. Otherwise we could generate
* refill bouncing.
*/
batchcount = BATCHREFILL_LIMIT;
}
n = get_node(cachep, node);--------------(2)
BUG_ON(ac->avail > 0 || !n);
spin_lock(&n->list_lock);
/* See if we can refill from the shared array */
if (n->shared && transfer_objects(ac, n->shared, batchcount)) {---------(3)
n->shared->touched = 1;
goto alloc_done;
}
while (batchcount > 0) {
struct list_head *entry;
struct page *page;
/* Get slab alloc is to come from. */
entry = n->slabs_partial.next;----------------(4)
if (entry == &n->slabs_partial) {
n->free_touched = 1;
entry = n->slabs_free.next;
if (entry == &n->slabs_free)
goto must_grow;
}
page = list_entry(entry, struct page, lru);--------------(5)
check_spinlock_acquired(cachep);
/*
* The slab was either on partial or free list so
* there must be at least one object available for
* allocation.
*/
BUG_ON(page->active >= cachep->num);------------(6)
while (page->active < cachep->num && batchcount--) {------------(7)
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
node));---------------(8)
}
/* move slabp to correct slabp list: */
list_del(&page->lru);-------------------------(9)
if (page->active == cachep->num)-----------------(10)
list_add(&page->lru, &n->slabs_full);
else
list_add(&page->lru, &n->slabs_partial);
}
must_grow:
n->free_objects -= ac->avail;
alloc_done:
spin_unlock(&n->list_lock);
if (unlikely(!ac->avail)) {
int x;
force_grow:
x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);--------------(11)
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
node = numa_mem_id();
/* no objects in sight? abort */
if (!x && (ac->avail == 0 || force_refill))
return NULL;
if (!ac->avail) /* objects refilled by interrupt? */-------------(12)
goto retry;
}
ac->touched = 1;
return ac_get_obj(cachep, ac, flags, force_refill);----------------(13)
}
(1)获取本CPU的本地对象缓冲池描述符
(2)获取slab节点描述符,前面已经初始化好
(3)如果slab节点的shared共享缓冲池有初始化,并且从中获取batchcount个slab对象成功,则goto alloc_done.
(4)如果共享缓冲池分配失败,则进入while循环,先后判断slab节点的slabs_partial和slabs_free节点是否为空,如果为空,则goto must_grow
(5)如果slab partial或者free不为空,则使用list_entry先获取当前链表的中的lru对应的page结构体
(6)如果page->active多余slab描述度中最大slab对象个数,报错
(7)关键操作,通过while循环,从当前收个页面为page的缓冲池中取出batchcount个slab对象添加到CPU的本地对象缓冲池
(8)从缓冲池中获取一个slab对象,添加到CPU本地对象缓冲池,此时page->active会加1,此时如果出现active=num且batchcount>0,则需要重新开始寻找可以分配slab对象的partial或者free链表,完成将ac->batchcount迁移到CPU本地对象缓冲池中
(9)将page从远链表中删除
(10)判断当前page的active是否等于slab的num,如果等于则将page移动到full链表,否则就移动到partial中
(11)如果partial和free链表都为空,则说明没有空闲的slab对象,需要重新创建一个对象缓冲池
(12)cache_grow函数只是重新分配了slab对象缓冲池,然后将其添加到slab_free链表,此处avail仍然为0,需要retry,此时会添加batchcount个slab对象到CPU本地对象缓冲池中,然后avail不等于0。
(13)从CPU本地对象缓冲池中获取一个slab对象,完成slab对象的分配。
下面来看cache_grow()函数:
static int cache_grow(struct kmem_cache *cachep,
gfp_t flags, int nodeid, struct page *page)
{
void *freelist;
size_t offset;
gfp_t local_flags;
struct kmem_cache_node *n;
/*
* Be lazy and only check for valid flags here, keeping it out of the
* critical path in kmem_cache_alloc().
*/
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
BUG();
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
/* Take the node list lock to change the colour_next on this node */
check_irq_off();
n = get_node(cachep, nodeid);
spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
offset = n->colour_next;------------------(1)
n->colour_next++;
if (n->colour_next >= cachep->colour)
n->colour_next = 0;
spin_unlock(&n->list_lock);
offset *= cachep->colour_off;
if (gfpflags_allow_blocking(local_flags))
local_irq_enable();
/*
* The test for missing atomic flag is performed here, rather than
* the more obvious place, simply to reduce the critical path length
* in kmem_cache_alloc(). If a caller is seriously mis-behaving they
* will eventually be caught here (where it matters).
*/
kmem_flagcheck(cachep, flags);
/*
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
if (!page)
page = kmem_getpages(cachep, local_flags, nodeid);-----------(2)
if (!page)
goto failed;
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);---------(3)
if (!freelist)
goto opps1;
slab_map_pages(cachep, page, freelist);---------------(4)
cache_init_objs(cachep, page);----------------(5)
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
check_irq_off();
spin_lock(&n->list_lock);
/* Make slab active. */
list_add_tail(&page->lru, &(n->slabs_free));---------------(6)
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num;-----------------------(7)
spin_unlock(&n->list_lock);
return 1;
opps1:
kmem_freepages(cachep, page);
failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return 0;
}
(1)colour_next表示下一个slab节点应该包含的colour数目,cache_colour从0开始增加,每个slab加1知道达到这个slab的最大值cachep->colour,然后再从0开始计算。
(2)为slab对象缓冲池分配2^gfporder个page
(3)初始化freelist,page->active,page->s_mem,freelist为slab对象缓冲池的首个page的虚拟地址+colour_next得到,page->active初始化为0,page->s_mem为首个page的虚拟地址+colour_next+cachep->freelist_size
(4)初始化page->slab_cache=cachep, page->freelist=freelist
(5)初始化obj的状态为OBJECT_FREE以及page->freelist[]数组
(6)将新申请的slab对象缓冲池添加到slab node的slabs_free上
(7)slab node的总的free_objects数量加上cachep->num个数.
以上为slab对象分配的全过程,讲到这里可以画出slab的整体的框架图如下:
以上以全局slab_cache链表的一个节点kmalloc-64为例介绍了一个slab节点各内存块的分布情况,其他节点以此类推,同时需要说明的是slab_cache全局链表上面是所有的通用的(如kmalloc-xxx)以及专用的(比如驱动专门申请命名的slab)slab描述符链接在一起组成的链表。