kernel hacker修炼之道之内存管理-SLUB(分配SLAB对象kmem_cache_alloc())

分配SLAB对象kmem_cache_alloc()

作者:李万鹏 于北京 borqs


void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);

static __always_inline void *slab_alloc(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
struct kmem_cache_cpu *c;
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long tid;
#else
unsigned long flags;
#endif

if (slab_pre_alloc_hook(s, gfpflags))
return NULL;

#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_save(flags);
#else
redo:
#endif

/*获得cpu local slab*/
c = __this_cpu_ptr(s->cpu_slab);


#ifdef CONFIG_CMPXCHG_LOCAL
/*
* The transaction ids are globally unique per cpu and per operation on
* a per cpu queue. Thus they can be guarantee that the cmpxchg_double
* occurs on the right processor and that there was no operation on the
* linked list in between.
*/
tid = c->tid;
barrier();
#endif

/*走fastpath,从c->freelist获得空闲对象*/
object = c->freelist;
/*如果c->freelist没有空闲对象或者与所要求的节点不匹配,只能走slowpath*/
if (unlikely(!object || !node_match(c, node)))

object = __slab_alloc(s, gfpflags, node, addr, c);

else {
#ifdef CONFIG_CMPXCHG_LOCAL

if (unlikely(!irqsafe_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
get_freepointer_safe(s, object), next_tid(tid)))) {

note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}
#else
/*指向下一个空闲对象*/
c->freelist = get_freepointer(s, object);
#endif
stat(s, ALLOC_FASTPATH);
}

#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags);
#endif
/*如果需要就将object清空*/
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->objsize);

slab_post_alloc_hook(s, gfpflags, object);

return object;
}

走slowpath:
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
void **object;
struct page *new;
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long flags;

local_irq_save(flags);
#ifdef CONFIG_PREEMPT
/*
* We may have been preempted and rescheduled on a different
* cpu before disabling interrupts. Need to reload cpu area
* pointer.
*/
/*获得cpu local slab*/
c = this_cpu_ptr(s->cpu_slab);
#endif
#endif

/* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO;
/*如果c->page也就是cpu local slab不存在就新分配一个*/
if (!c->page)
goto new_slab;

slab_lock(c->page);
/*如果是节点不匹配就去掉激活cpu local slab*/
if (unlikely(!node_match(c, node)))
goto another_slab;

stat(s, ALLOC_REFILL);
/*从c->page_freelist装入c->freelist*/
load_freelist:
object = c->page->freelist;
/*如果c->page也就是cpu local slab不存在就新分配一个*/
if (unlikely(!object))
goto another_slab;
if (kmem_cache_debug(s))
goto debug;
/*c->freelist指向了这个空闲对象的链表*/
c->freelist = get_freepointer(s, object);
c->page->inuse = c->page->objects;
/*c->page->freelist已经完全赋给了c->freelist,这里置空*/
c->page->freelist = NULL;
c->node = page_to_nid(c->page);
unlock_out:
slab_unlock(c->page);
#ifdef CONFIG_CMPXCHG_LOCAL
c->tid = next_tid(c->tid);
local_irq_restore(flags);
#endif
stat(s, ALLOC_SLOWPATH);
return object;

another_slab:
/*去掉激活cpu local slab,其实就是将cpu local slab释放回slabs_partial*/
deactivate_slab(s, c);

new_slab:
/*从slabs_partial获得一个新的slab*/
new = get_partial(s, gfpflags, node);
if (new) {
c->page = new;
stat(s, ALLOC_FROM_PARTIAL);
/*如果获得成功,重试载入c->freelist*/
goto load_freelist;
}

gfpflags &= gfp_allowed_mask;
if (gfpflags & __GFP_WAIT)
local_irq_enable();
/*运行到这里说明从slabs_partial获得slab失败了,则从buddy system分配一个新的slab块*/
new = new_slab(s, gfpflags, node);

if (gfpflags & __GFP_WAIT)
local_irq_disable();

if (new) {
c = __this_cpu_ptr(s->cpu_slab);
stat(s, ALLOC_SLAB);
/*清除旧的local slab*/
if (c->page)
flush_slab(s, c);
slab_lock(new);
/*冻结slab,表示他已经成为某个cpu的local slab*/
__SetPageSlubFrozen(new);
c->page = new;
/*如果成功,重试载入c->freelist*/
goto load_freelist;
}
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
#ifdef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags);
#endif
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
goto another_slab;

c->page->inuse++;
c->page->freelist = get_freepointer(s, object);
c->node = NUMA_NO_NODE;
goto unlock_out;
}

static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
return *(void **)(object + s->offset);
}

static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
__releases(bitlock)
{
/*获得cpu local slab*/
struct page *page = c->page;
int tail = 1;

if (page->freelist)
stat(s, DEACTIVATE_REMOTE_FREES);
/*
* Merge cpu freelist into slab freelist. Typically we get here
* because both freelists are empty. So this is unlikely
* to occur.
*/
/*如果是因为节点不匹配而不是因为c->page为空,则c->freelist有可能不为空,将c->freelist对应的空闲对象链表还给c->page->freelist*/
while (unlikely(c->freelist)) {
void **object;

tail = 0; /* Hot objects. Put the slab first */

/* Retrieve object from cpu_freelist */
object = c->freelist;
c->freelist = get_freepointer(s, c->freelist);

/* And put onto the regular freelist */
set_freepointer(s, object, page->freelist);
page->freelist = object;
page->inuse--;
}
c->page = NULL;
#ifdef CONFIG_CMPXCHG_LOCAL
c->tid = next_tid(c->tid);
#endif
/*解冻local slab*/
unfreeze_slab(s, page, tail);
}

static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
__releases(bitlock)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
/*清除页被冻结标志*/
__ClearPageSlubFrozen(page);
/*如果还有页在被使用*/
if (page->inuse) {
/*如果page->freelist不为空,将这个local slab添加到slabs_partial*/
if (page->freelist) {
add_partial(n, page, tail);
stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
} else {
stat(s, DEACTIVATE_FULL);
if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER))
add_full(n, page);
}
slab_unlock(page);
} else {
/*如果没有object被使用了,判但如果节点slabs_partial上slab的数量小于cache的slab最小数量则添加到slabs_partial*/
stat(s, DEACTIVATE_EMPTY);
if (n->nr_partial < s->min_partial) {
/*
* Adding an empty slab to the partial slabs in order
* to avoid page allocator overhead. This slab needs
* to come after the other slabs with objects in
* so that the others get filled first. That way the
* size of the partial list stays small.
*
* kmem_cache_shrink can reclaim any empty slabs from
* the partial list.
*/
add_partial(n, page, 1);
slab_unlock(page);
} else {
/*否则销毁这个slab*/
slab_unlock(page);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
}
}

static void discard_slab(struct kmem_cache *s, struct page *page)
{
dec_slabs_node(s, page_to_nid(page), page->objects);
/*释放给buddy system*/
free_slab(s, page);
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值