slub分配流程-kmem_cache_alloc函数
static __always_inline void *slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr)
{
void *object;
struct kmem_cache_cpu *c;
struct page *page;
unsigned long tid;
s = slab_pre_alloc_hook(s, gfpflags);
if (!s)
return NULL;
redo:
/*
* 保证获取到的tid和kmem_cache_cpu是同一个cpu上面的,以避免中间发生cpu切换
* 一旦不匹配则在do循环中重新获取直到匹配为止;
*/
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab);
} while (IS_ENABLED(CONFIG_PREEMPTION) &&
unlikely(tid != READ_ONCE(c->tid)));
barrier();
object = c->freelist;
page = c->page;
/*
* 从这里判断进入slowpath还是fastpath
* 进入fastpath的条件:
* 1、page与当前节点node匹配
* 2、c->freelist != NULL
*/
if (unlikely(!object || !node_match(page, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
stat(s, ALLOC_SLOWPATH);/* 标记slowpath状态 */
} else {
void *next_object = get_freepointer_safe(s, object);
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
next_object, next_tid(tid)))) {
note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);/* 标记fastpath状态 */
}
maybe_wipe_obj_freeptr(s, object);
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
slab_post_alloc_hook(s, gfpflags, 1, &object);
return object;
}
从图中可以大致可以分成4种情况:
1、c->freelist != NULL;
2、c->freelist = NULL,c->partital != NULL;
3、c->partital = NULL,n->partital != NULL;
4、n->partital = NULL的情况;
第一种情况,c->freelist != NULL,即在本地的slab缓存池不为空,这种分配即fastpath操作,精简代码后如下所示:
void *next_object = get_freepointer_safe(s, object); /*(1)*/
if (unlikely(!this_cpu_cmpxchg_double( /*(2)*/
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
next_object, next_tid(tid)))) {
note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}
prefetch_freepointer(s, next_object); /*(3)*/
stat(s, ALLOC_FASTPATH);/* 标记fastpath状态 */
/*
* (1)获取下一个空闲的object赋值给next_object
* (2)原子操作,主要做了三件事:
**重定向首指针指向当前CPU空间;
**判断tid和freelist未被修改;
**用新的tid和freelist覆盖旧的数据:
s->cpu_slab->freelist = next_object
s->cpu_slab->tid = next_tid(tid)
此时c->freelist就指向了新的下一个空闲对象;
* (3)刷新数据,主要是下一个空闲object的地址指向;
*/
图解如下图所示:
其余的几种情况均属于slowpath,在__slab_alloc中执行,
当kmem_cache_cpu的freelist没有可用的object时,需要去kmem_cache_cpu的partital链表中寻找可用的object,如果c->partial存在,则可找到对应的object,对应的函数主体如下:
if (slub_percpu_partial(c)) {
page = c->page = slub_percpu_partial(c);/*(1)*/
slub_set_percpu_partial(c, page); /*(2)*/
stat(s, CPU_PARTIAL_ALLOC);
goto redo; /*(3)*/
}
/*
* (1)slub_percpu_partial(c)函数实际上是返回c->partial,赋值给c->page;
* (2)更新c->partial的链表,将c->partial指向下一个page链表中的对象;
实际上就是:c->partial = page->next;
(3)返回redo标签,重新进行freelist的获取和更新;
*/
图解如下图所示:
当kmem_cache_cpu的freelist和partital链表都没有可用的object的时候,就需要去kmem_cache_node中去寻找可用的object。主体函数为get_partial_node:
static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
struct kmem_cache_cpu *c, gfp_t flags)
{
struct page *page, *page2;
void *object = NULL;
unsigned int available = 0;
int objects;
if (!n || !n->nr_partial)
return NULL;
spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
void *t;
if (!pfmemalloc_match(page, flags))
continue;
/*(1)*/
t = acquire_slab(s, n, page, object == NULL, &objects);
if (!t)
break;
available += objects;
if (!object) {
c->page = page;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE);
}
/*(2)*/
if (!kmem_cache_has_cpu_partial(s)
|| available > slub_cpu_partial(s) / 2)
break;
}
spin_unlock(&n->list_lock);
return object;
}
/*
* (1)变量partial,acquire_slab获取partial链表上slab空闲的数量,对于第一个获取到的slab,挂到c->page上;
* (2)然后将后续slab挂到c->partial,直到满足其数量大于cpu_partial的一半;
*/
如果在kmem_cache_cpu的freelist、partital链表以及kmem_cache_node的partital链表中都没有可用的object时,则需要调用new_slab函数从伙伴系统重新申请一页内存,此时函数主体如下:
page = new_slab(s, flags, node); /*(1)*/
if (page) {
c = raw_cpu_ptr(s->cpu_slab);/*(2)*/
if (c->page) /*(3)*/
flush_slab(s, c);
freelist = page->freelist; /*(4)*/
page->freelist = NULL; /*(5)*/
stat(s, ALLOC_SLAB);
c->page = page; /*(6)*/
*pc = c;
}
/*
* (1)从伙伴系统以页为单位分配内存,保存在page中;
* (2)获取当前cpu的kmem_cache_cpu结构;
* (3)如果存在c->page,则说明当前cpu(暂不清楚)
* (4)返回page中的freelist;
* (5)将page中的freelist指向NULL;
* (6)将新申请的page加入cpu本地缓存(c->page = page);其中(4)(5)(6)步主要是将申请的page 加入到该CPU的本地缓存中;
*/
当调用new_slab失败,无法从伙伴系统获取内存时,说明系统内存不足,此时就会发生OOM;