linux内核的slab

Slab的数据结构:

struct slab {

       struct list_head list;

       unsigned long colouroff;

       void *s_mem;        /* including colour offset */

       unsigned int inuse;  /* num of objs active in slab */

       kmem_bufctl_t free;

       unsigned short nodeid;

};

 

Kmem_cache的数据结构:

struct kmem_cache {

/* 1) per-cpu data, touched during every alloc/free */

       struct array_cache *array[NR_CPUS];

/* 2) Cache tunables. Protected by cache_chain_mutex */

       unsigned int batchcount;

       unsigned int limit;

       unsigned int shared;

       unsigned int buffer_size;

/* 3) touched by every alloc & free from the backend */

       struct kmem_list3 *nodelists[MAX_NUMNODES];

 

       unsigned int flags;         /* constant flags */

       unsigned int num;          /* # of objs per slab */

 

/* 4) cache_grow/shrink */

       /* order of pgs per slab (2^n) */

       unsigned int gfporder;

 

       /* force GFP flags, e.g. GFP_DMA */

       gfp_t gfpflags;

 

       size_t colour;                /* cache colouring range */

       unsigned int colour_off; /* colour offset */

       struct kmem_cache *slabp_cache;

       unsigned int slab_size;

       unsigned int dflags;              /* dynamic flags */

 

       /* constructor func */

       void (*ctor) (void *, struct kmem_cache *, unsigned long);

 

       /* de-constructor func */

       void (*dtor) (void *, struct kmem_cache *, unsigned long);

 

/* 5) cache creation/removal */

       const char *name;

       struct list_head next;

 

/* 6) statistics */

#if STATS

       unsigned long num_active;

       unsigned long num_allocations;

       unsigned long high_mark;

       unsigned long grown;

       unsigned long reaped;

       unsigned long errors;

       unsigned long max_freeable;

       unsigned long node_allocs;

       unsigned long node_frees;

       unsigned long node_overflow;

       atomic_t allochit;

       atomic_t allocmiss;

       atomic_t freehit;

       atomic_t freemiss;

#endif

#if DEBUG

       /*

        * If debugging is enabled, then the allocator can add additional

        * fields and/or padding to every object. buffer_size contains the total

        * object size including these internal fields, the following two

        * variables contain the offset to the user object and its size.

        */

       int obj_offset;

       int obj_size;

#endif

};

 

struct kmem_list3 {

       struct list_head slabs_partial;    /* partial list first, better asm code */

       struct list_head slabs_full;

       struct list_head slabs_free;

       unsigned long free_objects;

       unsigned int free_limit;

       unsigned int colour_next;   /* Per-node cache coloring */

       spinlock_t list_lock;

       struct array_cache *shared;    /* shared per node */

       struct array_cache **alien;    /* on other nodes */

       unsigned long next_reap;       /* updated without locking */

       int free_touched;           /* updated without locking */

};

 

 

Slab的目的是用来做cache的,就是保留一系列内核经常用的数据结构,这些对象(这些数据结构的内核另称)的组织是阶梯形的。

一个slab里面可以保存多个同类型对象,也可以只有一个。一个slab占的内存页的个数也在keme_cache中保存着。

keme_cache数据结构里保存着关于这种对象的slab的信息。在slab数据结构保存的是这个slab的对象的地址等等。

 

对于每一种对象,都有一个kmem_cache数据结构与之对应,所有不同型对象的kmem_cache数据结构通过kmem_cache中的next相连,kmem_cache中的struct kmem_list3 *nodelists[MAX_NUMNODES];中保存着这种对象的slab的连接结构。同时数据结构kmem_cache也是通过slab来实现的。Cache_cache保存着kmem_cachekmem_cache描述表。

另外在数据结构Kmem_cache看到有一项

       struct array_cache *array[NR_CPUS];

 

其为每一个cpu都保留了一个这中对象的cache,这个cache在没有时填补,在多余时释放些。

 

看几个函数:

 

里面最核心的函数是static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags

 

 

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)

{

       return __cache_alloc(cachep, flags, __builtin_return_address(0));

}

 

/**

 * kmem_cache_alloc_node - Allocate an object on the specified node

 * @cachep: The cache to allocate from.

 * @flags: See kmalloc().

 * @nodeid: node number of the target node.

 *

 * Identical to kmem_cache_alloc, except that this function is slow

 * and can sleep. And it will allocate memory on the given node, which

 * can improve the performance for cpu bound structures.

 * New and improved: it will now make sure that the object gets

 * put on the correct node list so that there is no false sharing.

 */

void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)

{

       unsigned long save_flags;

       void *ptr;

 

       cache_alloc_debugcheck_before(cachep, flags);

       local_irq_save(save_flags);

 

       if (nodeid == -1 || nodeid == numa_node_id() ||

                     !cachep->nodelists[nodeid])

              ptr = ____cache_alloc(cachep, flags);

       else

              ptr = __cache_alloc_node(cachep, flags, nodeid);

       local_irq_restore(save_flags);

 

       ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,

                                      __builtin_return_address(0));

 

       return ptr;

}

 

static __always_inline void *__cache_alloc(struct kmem_cache *cachep,

                                          gfp_t flags, void *caller)

{

       unsigned long save_flags;

       void *objp;

 

       cache_alloc_debugcheck_before(cachep, flags);

 

       local_irq_save(save_flags);

       objp = ____cache_alloc(cachep, flags);

       local_irq_restore(save_flags);

       objp = cache_alloc_debugcheck_after(cachep, flags, objp,

                                       caller);

       prefetchw(objp);

       return objp;

}

 

这个函数首先得到一个这个cpustruct array_cache结构,重要的是对这个结构的理解,

struct array_cache {

       unsigned int avail; //可用的个数,也是一个索引,每次都是从后往前区。

       unsigned int limit;//最大限制,

       unsigned int batchcount;//每次增减的个数

       unsigned int touched;//最近是否取过?

       spinlock_t lock;

       void *entry[0];

/*这样定义便是对应的对象的起始地址,因为它定义成了0个,而在 struct array_cache 后面紧跟的便是对象数组。

                      * Must have this definition in here for the proper

                      * alignment of array_cache. Also simplifies accessing

                      * the entries.

                      * [0] is for gcc 2.95. It should really be [].

                      */

};

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)

{

       void *objp;

       struct array_cache *ac;

 

#ifdef CONFIG_NUMA

       if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {

              objp = alternate_node_alloc(cachep, flags);

              if (objp != NULL)

                     return objp;

       }

#endif

       check_irq_off();

       ac = cpu_cache_get(cachep);    //cache->array[smp_processor_id()];

       if (likely(ac->avail)) {

              STATS_INC_ALLOCHIT(cachep);

              ac->touched = 1;

              objp = ac->entry[--ac->avail];//从后往前取。

       } else {

              STATS_INC_ALLOCMISS(cachep);

              objp = cache_alloc_refill(cachep, flags);  //见其函数!!

       }

       return objp;

}

 

 

 

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)

{

       int batchcount;

       struct kmem_list3 *l3;

       struct array_cache *ac;

 

       check_irq_off();

       ac = cpu_cache_get(cachep);  //cache->array[smp_processor_id()];

retry:

       batchcount = ac->batchcount;

       if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {

              /*

               * If there was little recent activity on this cache, then

               * perform only a partial refill.  Otherwise we could generate

               * refill bouncing.

               */

              batchcount = BATCHREFILL_LIMIT;

       }

 

//从这种对象的slab中取或从share中取,先得到slab的连接结构

       L3 = cachep->nodelists[numa_node_id()];

 

       BUG_ON(ac->avail > 0 || !l3);

       spin_lock(&l3->list_lock);

 

       /* See if we can refill from the shared array */

/* l3->shared 指向shared array */

       if (l3->shared && transfer_objects(ac, l3->shared, batchcount))

              goto alloc_done;

 

       while (batchcount > 0) {

              struct list_head *entry;

              struct slab *slabp;

              /* Get slab alloc is to come from. */

              entry = l3->slabs_partial.next;

              if (entry == &l3->slabs_partial) {

                     l3->free_touched = 1;

                     entry = l3->slabs_free.next;

                     if (entry == &l3->slabs_free)

                            goto must_grow;

              }

 

              slabp = list_entry(entry, struct slab, list);

              check_slabp(cachep, slabp);

              check_spinlock_acquired(cachep);

              while (slabp->inuse < cachep->num && batchcount--) {

                     STATS_INC_ALLOCED(cachep);

                     STATS_INC_ACTIVE(cachep);

                     STATS_SET_HIGH(cachep);

 

                     ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,

                                                     numa_node_id());

              }

              check_slabp(cachep, slabp);

 

              /* move slabp to correct slabp list: */

              list_del(&slabp->list);

              if (slabp->free == BUFCTL_END)

                     list_add(&slabp->list, &l3->slabs_full);

              else

                     list_add(&slabp->list, &l3->slabs_partial);

       }

 

must_grow:

       l3->free_objects -= ac->avail;

alloc_done:

       spin_unlock(&l3->list_lock);

 

       if (unlikely(!ac->avail)) {

              int x;

              x = cache_grow(cachep, flags, numa_node_id());

 

              /* cache_grow can reenable interrupts, then ac could change. */

              ac = cpu_cache_get(cachep);

              if (!x && ac->avail == 0)     /* no objects in sight? abort */

                     return NULL;

 

              if (!ac->avail)        /* objects refilled by interrupt? */

                     goto retry;

       }

       ac->touched = 1;

       return ac->entry[--ac->avail];

 

kmalloc is for general purpose object.

 

static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, void *caller)

{

       struct kmem_cache *cachep;

 

       /* If you want to save a few bytes .text space: replace

        * __ with kmem_.

        * Then kmalloc uses the uninlined functions instead of the inline

        * functions.

        */

       cachep = __find_general_cachep(size, flags);

       if (unlikely(cachep == NULL))

              return NULL;

       return __cache_alloc(cachep, flags, caller);

}

 

static inline struct kmem_cache *__find_general_cachep(size_t size,

                                                 gfp_t gfpflags)

{

       struct cache_sizes *csizep = malloc_sizes;

 

#if DEBUG

       /* This happens if someone tries to call

        * kmem_cache_create(), or __kmalloc(), before

        * the generic caches are initialized.

        */

       BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);

#endif

       while (size > csizep->cs_size)

              csizep++;

 

       /*

        * Really subtle: The last entry with cs->cs_size==ULONG_MAX

        * has cs_{dma,}cachep==NULL. Thus no special case

        * for large kmalloc calls required.

        */

       if (unlikely(gfpflags & GFP_DMA))

              return csizep->cs_dmacachep;

       return csizep->cs_cachep;

}

 

struct cache_sizes malloc_sizes[] = {

#define CACHE(x) { .cs_size = (x) },

#include <linux/kmalloc_sizes.h>

       CACHE(ULONG_MAX)

#undef CACHE

};

 
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值