linux内核的slab

最新推荐文章于 2023-03-14 15:34:16 发布

VIV777

最新推荐文章于 2023-03-14 15:34:16 发布

阅读量1.8k

点赞数

文章标签： linux内核 cache struct 数据结构 list locking

本文链接：https://blog.csdn.net/VIV777/article/details/1680364

版权

Slab的数据结构：

struct slab {

struct list_head list;

unsigned long colouroff;

void *s_mem; /* including colour offset */

unsigned int inuse; /* num of objs active in slab */

kmem_bufctl_t free;

unsigned short nodeid;

};

Kmem_cache的数据结构：

struct kmem_cache {

/* 1) per-cpu data, touched during every alloc/free */

struct array_cache *array[NR_CPUS];

/* 2) Cache tunables. Protected by cache_chain_mutex */

unsigned int batchcount;

unsigned int limit;

unsigned int shared;

unsigned int buffer_size;

/* 3) touched by every alloc & free from the backend */

struct kmem_list3 *nodelists[MAX_NUMNODES];

unsigned int flags; /* constant flags */

unsigned int num; /* # of objs per slab */

/* 4) cache_grow/shrink */

/* order of pgs per slab (2^n) */

unsigned int gfporder;

/* force GFP flags, e.g. GFP_DMA */

gfp_t gfpflags;

size_t colour; /* cache colouring range */

unsigned int colour_off; /* colour offset */

struct kmem_cache *slabp_cache;

unsigned int slab_size;

unsigned int dflags; /* dynamic flags */

/* constructor func */

void (*ctor) (void *, struct kmem_cache *, unsigned long);

/* de-constructor func */

void (*dtor) (void *, struct kmem_cache *, unsigned long);

/* 5) cache creation/removal */

const char *name;

struct list_head next;

/* 6) statistics */

#if STATS

unsigned long num_active;

unsigned long num_allocations;

unsigned long high_mark;

unsigned long grown;

unsigned long reaped;

unsigned long errors;

unsigned long max_freeable;

unsigned long node_allocs;

unsigned long node_frees;

unsigned long node_overflow;

atomic_t allochit;

atomic_t allocmiss;

atomic_t freehit;

atomic_t freemiss;

#endif

#if DEBUG

* If debugging is enabled, then the allocator can add additional

* fields and/or padding to every object. buffer_size contains the total

* object size including these internal fields, the following two

* variables contain the offset to the user object and its size.

int obj_offset;

int obj_size;

#endif

};

struct kmem_list3 {

struct list_head slabs_partial; /* partial list first, better asm code */

struct list_head slabs_full;

struct list_head slabs_free;

unsigned long free_objects;

unsigned int free_limit;

unsigned int colour_next; /* Per-node cache coloring */

spinlock_t list_lock;

struct array_cache *shared; /* shared per node */

struct array_cache **alien; /* on other nodes */

unsigned long next_reap; /* updated without locking */

int free_touched; /* updated without locking */

};

Slab的目的是用来做cache的，就是保留一系列内核经常用的数据结构，这些对象（这些数据结构的内核另称）的组织是阶梯形的。

一个slab里面可以保存多个同类型对象，也可以只有一个。一个slab占的内存页的个数也在keme_cache中保存着。

在keme_cache数据结构里保存着关于这种对象的slab的信息。在slab数据结构保存的是这个slab的对象的地址等等。

对于每一种对象，都有一个kmem_cache数据结构与之对应，所有不同型对象的kmem_cache数据结构通过kmem_cache中的next相连，kmem_cache中的struct kmem_list3 *nodelists[MAX_NUMNODES];中保存着这种对象的slab的连接结构。同时数据结构kmem_cache也是通过slab来实现的。Cache_cache保存着kmem_cache的kmem_cache描述表。

另外在数据结构Kmem_cache看到有一项

struct array_cache *array[NR_CPUS];

其为每一个cpu都保留了一个这中对象的cache,这个cache在没有时填补，在多余时释放些。

看几个函数：

里面最核心的函数是static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags，

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)

{

return __cache_alloc(cachep, flags, __builtin_return_address(0));

}

/**

* kmem_cache_alloc_node - Allocate an object on the specified node

* @cachep: The cache to allocate from.

* @flags: See kmalloc().

* @nodeid: node number of the target node.

* Identical to kmem_cache_alloc, except that this function is slow

* and can sleep. And it will allocate memory on the given node, which

* can improve the performance for cpu bound structures.

* New and improved: it will now make sure that the object gets

* put on the correct node list so that there is no false sharing.

void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)

{

unsigned long save_flags;

void *ptr;

cache_alloc_debugcheck_before(cachep, flags);

local_irq_save(save_flags);

if (nodeid == -1 || nodeid == numa_node_id() ||

!cachep->nodelists[nodeid])

ptr = ____cache_alloc(cachep, flags);

else

ptr = __cache_alloc_node(cachep, flags, nodeid);

local_irq_restore(save_flags);

ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,

__builtin_return_address(0));

return ptr;

}

static __always_inline void *__cache_alloc(struct kmem_cache *cachep,

gfp_t flags, void *caller)

{

unsigned long save_flags;

void *objp;

cache_alloc_debugcheck_before(cachep, flags);

local_irq_save(save_flags);

objp = ____cache_alloc(cachep, flags);

local_irq_restore(save_flags);

objp = cache_alloc_debugcheck_after(cachep, flags, objp,

caller);

prefetchw(objp);

return objp;

}

这个函数首先得到一个这个cpu的struct array_cache结构，重要的是对这个结构的理解，

struct array_cache {

unsigned int avail; //可用的个数，也是一个索引，每次都是从后往前区。

unsigned int limit;//最大限制，

unsigned int batchcount;//每次增减的个数

unsigned int touched;//最近是否取过？

spinlock_t lock;

void *entry[0];

/*这样定义便是对应的对象的起始地址，因为它定义成了0个，而在 struct array_cache 后面紧跟的便是对象数组。

* Must have this definition in here for the proper

* alignment of array_cache. Also simplifies accessing

* the entries.

* [0] is for gcc 2.95. It should really be [].

};

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)

{

void *objp;

struct array_cache *ac;

#ifdef CONFIG_NUMA

if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {

objp = alternate_node_alloc(cachep, flags);

if (objp != NULL)

return objp;

}

#endif

check_irq_off();

ac = cpu_cache_get(cachep); //cache->array[smp_processor_id()];

if (likely(ac->avail)) {

STATS_INC_ALLOCHIT(cachep);

ac->touched = 1;

objp = ac->entry[--ac->avail];//从后往前取。

} else {

STATS_INC_ALLOCMISS(cachep);

objp = cache_alloc_refill(cachep, flags); //见其函数！！

}

return objp;

}

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)

{

int batchcount;

struct kmem_list3 *l3;

struct array_cache *ac;

check_irq_off();

ac = cpu_cache_get(cachep); //cache->array[smp_processor_id()];

retry:

batchcount = ac->batchcount;

if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {

* If there was little recent activity on this cache, then

* perform only a partial refill. Otherwise we could generate

* refill bouncing.

batchcount = BATCHREFILL_LIMIT;

}

//从这种对象的slab中取或从share中取，先得到slab的连接结构

L3 = cachep->nodelists[numa_node_id()];

BUG_ON(ac->avail > 0 || !l3);

spin_lock(&l3->list_lock);

/* See if we can refill from the shared array */

/* l3->shared 指向shared array */

if (l3->shared && transfer_objects(ac, l3->shared, batchcount))

goto alloc_done;

while (batchcount > 0) {

struct list_head *entry;

struct slab *slabp;

/* Get slab alloc is to come from. */

entry = l3->slabs_partial.next;

if (entry == &l3->slabs_partial) {

l3->free_touched = 1;

entry = l3->slabs_free.next;

if (entry == &l3->slabs_free)

goto must_grow;

}

slabp = list_entry(entry, struct slab, list);

check_slabp(cachep, slabp);

check_spinlock_acquired(cachep);

while (slabp->inuse < cachep->num && batchcount--) {

STATS_INC_ALLOCED(cachep);

STATS_INC_ACTIVE(cachep);

STATS_SET_HIGH(cachep);

ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,

numa_node_id());

}

check_slabp(cachep, slabp);

/* move slabp to correct slabp list: */

list_del(&slabp->list);

if (slabp->free == BUFCTL_END)

list_add(&slabp->list, &l3->slabs_full);

else

list_add(&slabp->list, &l3->slabs_partial);

}

must_grow:

l3->free_objects -= ac->avail;

alloc_done:

spin_unlock(&l3->list_lock);

if (unlikely(!ac->avail)) {

int x;

x = cache_grow(cachep, flags, numa_node_id());

/* cache_grow can reenable interrupts, then ac could change. */

ac = cpu_cache_get(cachep);

if (!x && ac->avail == 0) /* no objects in sight? abort */

return NULL;

if (!ac->avail) /* objects refilled by interrupt? */

goto retry;

}

ac->touched = 1;

return ac->entry[--ac->avail];

kmalloc is for general purpose object.

static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, void *caller)

{

struct kmem_cache *cachep;

/* If you want to save a few bytes .text space: replace

* __ with kmem_.

* Then kmalloc uses the uninlined functions instead of the inline

* functions.

cachep = __find_general_cachep(size, flags);

if (unlikely(cachep == NULL))

return NULL;

return __cache_alloc(cachep, flags, caller);

}

static inline struct kmem_cache *__find_general_cachep(size_t size,

gfp_t gfpflags)

{

struct cache_sizes *csizep = malloc_sizes;

#if DEBUG

/* This happens if someone tries to call

* kmem_cache_create(), or __kmalloc(), before

* the generic caches are initialized.

BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);

#endif

while (size > csizep->cs_size)

csizep++;

* Really subtle: The last entry with cs->cs_size==ULONG_MAX

* has cs_{dma,}cachep==NULL. Thus no special case

* for large kmalloc calls required.

if (unlikely(gfpflags & GFP_DMA))

return csizep->cs_dmacachep;

return csizep->cs_cachep;

}

struct cache_sizes malloc_sizes[] = {

#define CACHE(x) { .cs_size = (x) },

#include <linux/kmalloc_sizes.h>

CACHE(ULONG_MAX)

#undef CACHE

};

VIV777

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫