slub内存管理的4个主要接口函数如下:
//slab缓存的创建
struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *));
//slab object的分配
void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
//slab object的释放
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
//slab缓存的释放
void kmem_cache_destroy(struct kmem_cache *);
本篇主要对kmem_cache_create介绍
一、函数详细调用关系图
二、kmem_cache_create 函数代码具体介绍
kmem_cache_create 函数直接调用kmem_cache_create_usercopy来完成slab缓存的创建
mm/slab_common.c
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
ctor);
}
EXPORT_SYMBOL(kmem_cache_create);
/*
* kmem_cache_create_usercopy - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
* @flags: SLAB flags
* @useroffset: Usercopy region offset
* @usersize: Usercopy region size
* @ctor: A constructor for the objects.
*
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a interrupt, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
* to catch references to uninitialised memory.
*
* %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
* for buffer overruns.
*
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*/
struct kmem_cache *
kmem_cache_create_usercopy(const char *name,
unsigned int size, unsigned int align,
slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
const char *cache_name;
int err;
get_online_cpus();//是对cpu_online_map的加锁,其与末尾的put_online_cpus()是配对使用的
get_online_mems();//
memcg_get_cache_ids();
mutex_lock(&slab_mutex);
//如上四个函数都是对相关变量的加锁操作,末尾有对应的解锁操作
err = kmem_cache_sanity_check(name, size); //对name和size进行校验,如果不符合要求返回-EINVAL,正常返回0
if (err) {
goto out_unlock;
}
/* Refuse requests with allocator specific flags */
if (flags & ~SLAB_FLAGS_PERMITTED) { //标志位的检查
err = -EINVAL;
goto out_unlock;
}
/*
* Some allocators will constraint the set of valid flags to a subset
* of all flags. We expect them to define CACHE_CREATE_MASK in this
* case, and we'll just provide them with a sanitized version of the
* passed flags.
*/
flags &= CACHE_CREATE_MASK;//标志位的检查
/* Fail closed on bad usersize of useroffset values. */
//检查传递的参数,名字都是否正确。在这里usersize=useroffset=0的
if (WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
if (!usersize)
//检查传递的大小和一些flag是否可以和系统中已经创建的slab匹配上,
//如果匹配上则就不用重新申请了,直接使用别名就行,相当于链接过去,下面if判断就直接跳到末尾执行,否则返回NULL,继续执行
s = __kmem_cache_alias(name, size, align, flags, ctor);
if (s)
goto out_unlock;
//定义这个缓存的名字,用于在/proc/slabinfo中显示
cache_name = kstrdup_const(name, GFP_KERNEL);
if (!cache_name) {
err = -ENOMEM;
goto out_unlock;
}
//kmem_cache_create_usercopy的关键函数,如果没有找到可以复用的slab缓存,则创建一个新的slab缓存
//calculate_alignment计算内存对齐的值
s = create_cache(cache_name, size,
calculate_alignment(flags, align, size),
flags, useroffset, usersize, ctor, NULL, NULL);
if (IS_ERR(s)) {
err = PTR_ERR(s);
kfree_const(cache_name);
}
out_unlock:
//跟前面的加锁对应的解锁操作
mutex_unlock(&slab_mutex);
memcg_put_cache_ids();
put_online_mems();
put_online_cpus();
if (err) {
if (flags & SLAB_PANIC)
panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
name, err);
else {
pr_warn("kmem_cache_create(%s) failed with error %d\n",
name, err);
dump_stack();
}
return NULL;
}
return s;
}
EXPORT_SYMBOL(kmem_cache_create_usercopy);
三、__kmem_cache_alias函数
检查传递的大小和一些flag是否可以和系统中已经创建的slab匹配上,如果匹配上则就不用重新申请了,直接使用别名就行,相当于链接过去,主要靠find_mergeable函数实现,否则,返回NULL
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
struct kmem_cache *s, *c;
s = find_mergeable(size, align, flags, name, ctor); //核心函数,找寻是否有已经创建的slab缓存,是否可以复用,失败则返回NULL
if (s) {
s->refcount++;//可以复用,则引用计数加1
/*
* Adjust the object sizes so that we clear
* the complete object on kzalloc.
*/
s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
for_each_memcg_cache(c, s) {
c->object_size = s->object_size;
c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
}
//在bootup期间缓冲别名,直到 sysfs 可用,以免丢失该slab复用信息。
if (sysfs_slab_alias(s, name)) {
s->refcount--;
s = NULL;
}
}
return s;
}
1、find_mergeable函数
mm/slab.h
/* If !memcg, all caches are root. */
#define slab_root_caches slab_caches //slab缓存链表
#define root_caches_node list //链表name
mm/slab_common.c
struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
slab_flags_t flags, const char *name, void (*ctor)(void *))
{
struct kmem_cache *s;
if (slab_nomerge)//如果这个slab缓存不让复用
return NULL;
if (ctor)
return NULL;
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name, NULL);
if (flags & SLAB_NEVER_MERGE)//flags标记了不允许使用复用的slab缓存
return NULL;
//遍历系统中所有slab构成的链表slab_root_caches,找到符合要求的slab缓存,并返回S,否则返回NULL
list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
if (slab_unmergeable(s))
continue;
if (size > s->size)
continue;
if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
continue;
/*
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
*/
if ((s->size & ~(align - 1)) != s->size)
continue;
if (s->size - size >= sizeof(void *))
continue;
if (IS_ENABLED(CONFIG_SLAB) && align &&
(align > s->align || s->align % align))
continue;
return s;
}
return NULL;
}
四、create_cache
如果没有合适的可以复用的,则调用create_cache重新创建一个slab缓存create_cache(cache_name, size,calculate_alignment(flags, align, size),flags, useroffset, usersize, ctor, NULL, NULL);
先介绍一下calculate_alignment函数
* Figure out what the alignment of the objects will be given a set of
* flags, a user specified alignment and the size of the objects.
*/
//返回内存对齐值
static unsigned int calculate_alignment(slab_flags_t flags,
unsigned int align, unsigned int size)
{
/*
* If the user wants hardware cache aligned objects then follow that
* suggestion if the object is sufficiently large.
*
* The hardware cache alignment cannot override the specified
* alignment though. If that is greater then use it.
*/
//如果使能了SLAB_HWCACHE_ALIGN,代表按照硬件cache对齐
if (flags & SLAB_HWCACHE_ALIGN) {
unsigned int ralign;
ralign = cache_line_size();//得到的是L1 cache line的大小,一般是64/32B
while (size <= ralign / 2)
ralign /= 2;
align = max(align, ralign);
}
if (align < ARCH_SLAB_MINALIGN)//如果align小于slab 最小值则,直接取最小值8B
align = ARCH_SLAB_MINALIGN;
return ALIGN(align, sizeof(void *));//ALIGN(align,8)返回以8为边界(必须是8的倍数,且向上取数)的字节对齐后的align
}
mm/slab_common.c
正主来了......
static struct kmem_cache *create_cache(const char *name,
unsigned int object_size, unsigned int align,
slab_flags_t flags, unsigned int useroffset,
unsigned int usersize, void (*ctor)(void *),
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
struct kmem_cache *s;
int err;
//useroffset和usersize初始值就是0,跳过if判断
if (WARN_ON(useroffset + usersize > object_size))
useroffset = usersize = 0;
err = -ENOMEM;
//分配一个kmem_cache结构的对象,根据代码实现你会发现,这里实际是从名为“kmem_cache”中分配一个
//object,而分配的object刚好就是我们需要的一个slab大小(slab大小的最小值,s->min),而且内存上的内容为0( 设置了__GFP_ZERO),这里理解就是分配了一个kmem_cache结构的对象即可
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
if (!s)
goto out;
//初始化已分配的kmem_cache结构里面的部分字段
s->name = name;
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
s->useroffset = useroffset;
s->usersize = usersize;
//初始化kmem_cache中的memcg_cache_params 参数,memory cgroup提供进程的内存隔离功能,默认初始化成功返回0
err = init_memcg_params(s, memcg, root_cache);
if (err)
goto out_free_cache;
//create_cache的核心函数,slab/slub/slob均实现了该函数,slab缓存创建成功,则返回0
err = __kmem_cache_create(s, flags);
if (err)
goto out_free_cache;
//初始化引用计数为1
s->refcount = 1;
//将创建的slab缓存添加到系统的全局slab链表中
list_add(&s->list, &slab_caches);
memcg_link_cache(s);
out:
if (err)
return ERR_PTR(err);
//这里返回创建的slab缓存
return s;
out_free_cache:
destroy_memcg_params(s);
kmem_cache_free(kmem_cache, s);
goto out;
}
1、__kmem_cache_create函数和kmem_cache_open函数
现在开始进入create_cache的核心函数__kmem_cache_create(s,flags),而__kmem_cache_create的核心函数是kmem_cache_open
mm/slab.h
/*
* State of the slab allocator.
*
* This is used to describe the states of the allocator during bootup.
* Allocators use this to gradually bootstrap themselves. Most allocators
* have the problem that the structures used for managing slab caches are
* allocated from slab caches themselves.
*/
enum slab_state {
DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */
PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
};
int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
{
int err;
//成功执行,返回0,核心函数
err = kmem_cache_open(s, flags);
if (err)
return err;
/* Mutex is not taken during early boot */
//系统启动初期调用kmem_cache_init()->create_kmalloc_caches()创建多个管理不同大小对象的kmem_cache
//如果slab state的状态小于等于UP则return 0退出,,否则会继续往下,目前看到的是create_kmem_caches函数会将其置为UP
if (slab_state <= UP)
return 0;
//将slab信息添加到sys节点下,这样/sys/kernel/slab下会有注册好的slab
memcg_propagate_slab_attrs(s);
err = sysfs_slab_add(s);
if (err)
__kmem_cache_release(s);
return err;
}
介绍kmem_cache_open函数
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
//根据slub_debug标志看是否要更新flags
s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
s->random = get_random_long();
#endif
//正常成功执行是返回1,根据object大小,计算出最佳的order值和这个slab缓存的object数目,然后初始化kmem_cache 结构中的s->oo,s->min,s->size,s->max值
if (!calculate_sizes(s, -1))
goto error;
//开启slub debug后,disable_higher_order_debug为1,否则为0
if (disable_higher_order_debug) {
/*
* Disable debugging flags that store metadata if the min slab
* order increased.
*/
//如果开启debug后,会导致slab的最小的order(s->min)增加,则disable掉debug flag标志,重新执行一次calculate_sizes
if (get_order(s->size) > get_order(s->object_size)) {
s->flags &= ~DEBUG_METADATA_FLAGS;
s->offset = 0;
if (!calculate_sizes(s, -1))
goto error;
}
}
//使能快速模式,后面slab缓存对象分配的时候要用到???
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
/* Enable fast mode */
s->flags |= __CMPXCHG_DOUBLE;
#endif
/*
* The larger the object size is, the more pages we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2);//初始化s->min_partial
set_cpu_partial(s);//初始化s->cpu_partial
//如果定义了CONFIG_NUMA,则要更新
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
#endif
/* Initialize the pre-computed randomized freelist if slab is up */
//默认slab_state是down
if (slab_state >= UP) {
if (init_cache_random_seq(s))
goto error;
}
//初始化s->node,如成功则返回1
if (!init_kmem_cache_nodes(s))
goto error;
//初始化s->cpu_slab,如成功则返回1,然后退出kmem_cache_open函数
if (alloc_kmem_cache_cpus(s))
return 0;
//如果alloc_kmem_cache_cpus返回0,则释放slab缓存s,并打印错误信息
free_kmem_cache_nodes(s);
error:
if (flags & SLAB_PANIC)
panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
s->name, s->size, s->size,
oo_order(s->oo), s->offset, (unsigned long)flags);
return -EINVAL;
}
1.1 calculate_sizes
根据object大小,计算出最佳order值,得到这个slab缓存的object数目,然后初始化kmem_cache 结构中的s->oo,s->min,s->size,s->max值
/*
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
*/
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
unsigned int order;
/*
* Round up object size to the next word boundary. We can only
* place the free pointer at word boundaries and this determines
* the possible location of the free pointer.
*/
//按照8字节对齐,size必须是8字节的倍数
size = ALIGN(size, sizeof(void *));
#ifdef CONFIG_SLUB_DEBUG
/*
* Determine if we can poison the object itself. If the user of
* the slab may touch the object after free or before allocation
* then we should never poison the object itself.
*/
//开启slub debug后,确认是否开启slab_poison标志
if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
!s->ctor)
s->flags |= __OBJECT_POISON;
else
s->flags &= ~__OBJECT_POISON;
/*
* If we are Redzoning then check if there is some space between the
* end of the object and the free pointer. If not then add an
* additional word to have some bytes to store Redzone information.
*/
//开启slub debug后,如果object尾部和下一个object对象头部之间没有剩余空间,则增加8B用于存放Redzone信息
if ((flags & SLAB_RED_ZONE) && size == s->object_size)
size += sizeof(void *);
#endif
/*
* With that we have determined the number of bytes in actual use
* by the object. This is the potential offset to the free pointer.
*/
//object_size按照word对齐之后的大小
s->inuse = size;
//当在做RCU,有构造函数,析构函数或者正在posing时,不允许使用object的头8个字节用来存放FP(free pointer),即不允许指针内置式,
//那么我们就是使用指针外置式,在object的尾部在分配sizeof(void *)字节来保存这个指针
if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
s->ctor)) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
* kmem_cache_free.
*
* This is the case if we do RCU, have a constructor or
* destructor or are poisoning the objects.
*/
s->offset = size;
size += sizeof(void *);
}
#ifdef CONFIG_SLUB_DEBUG
//开启slub debug后,根据slub debug的object的layout,需要添加内存来保存alloc/free track信息,因此需要增加object的size大小
if (flags & SLAB_STORE_USER)
/*
* Need to store information about allocs and frees after
* the object.
*/
size += 2 * sizeof(struct track);
#endif
//对开启slub debug的object layout里面的readzone区域大小进行检查
kasan_cache_create(s, &size, &s->flags);
#ifdef CONFIG_SLUB_DEBUG
//开启slub debug后,根据slub debug的object的layout,对于padding和red_left_pad分别需要添加8B内存保存相应信息
if (flags & SLAB_RED_ZONE) {
/*
* Add some empty padding so that we can catch
* overwrites from earlier objects rather than let
* tracking information or the free pointer be
* corrupted if a user writes before the start
* of the object.
*/
size += sizeof(void *);
s->red_left_pad = sizeof(void *);
s->red_left_pad = ALIGN(s->red_left_pad, s->align);
size += s->red_left_pad;
}
#endif
/*
* SLUB stores one object immediately after another beginning from
* offset 0. In order to align the objects we have to simply size
* each object to conform to the alignment.
*/
//无论是否开启slub debug,最后基于s->align来进行字节对齐,最后将最终的size赋值给s->size
size = ALIGN(size, s->align);
s->size = size;
//根据前面输入的参数,forced_order == -1,走else进入calculate_order
/*
calculate_order函数
a.根据size确定max_objects和min_objects大小
b.在while循环中找出合理的order,然后return
c.如果没合适的order,将min_object设为1,计算order,return
d.如果还是没有合适的,将循环的最大order设为MAX_ORDER(11),计算order,return
*/
if (forced_order >= 0)
order = forced_order;
else
order = calculate_order(size);
if ((int)order < 0)
return 0;
//初始化从伙伴系统分配内存掩码为0
s->allocflags = 0;
if (order)
s->allocflags |= __GFP_COMP;
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= GFP_DMA;
if (s->flags & SLAB_CACHE_DMA32)
s->allocflags |= GFP_DMA32;
if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE;
/*
* Determine the number of objects per slab
*/
//根据这个slab缓存的 order大小和object的大小,利用oo_make函数组合得到s->oo
s->oo = oo_make(order, size);
//这个get_order(size)值一般就是等于上面的order,没看明白这里为什么要用这个函数
s->min = oo_make(get_order(size), size);
//oo_objects取出s->oo低16位值,即一个slab缓存的object数目,同样对s->max执行相同操作,得到最大值,如果大于s->max,则令s->max=s->oo
if (oo_objects(s->oo) > oo_objects(s->max))
s->max = s->oo;
//根据后面的名词释义,这里返回1
return !!oo_objects(s->oo);
}
(1)calculate_order函数
根据size,返回分配给slab缓存的order,根据order,可以得到该slab缓存的page数目是2^order。
mm/slub.c
/*
* Mininum / Maximum order of slab pages. This influences locking overhead
* and slab fragmentation. A higher order reduces the number of partial slabs
* and increases the number of allocations possible without having to
* take the list_lock.
*/
//这三个是静态全局变量
static unsigned int slub_min_order;//默认为0
static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; //等于3
static unsigned int slub_min_objects;//默认为0
/*
* Mininum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
*/
#define MIN_PARTIAL 5
/*
* Maximum number of desirable partial slabs.
* The existence of more partial slabs makes kmem_cache_shrink
* sort the partial list by the number of objects in use.
*/
#define MAX_PARTIAL 10
#define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1)
#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 *///每个page,最大的object数目是32767
//计算给定size,对应order下,slab缓存里面可以包含多少object
static inline unsigned int order_objects(unsigned int order, unsigned int size)
{
return ((unsigned int)PAGE_SIZE << order) / size;//PAGE_SIZE=4K
}
include/asm-generic/getorder.h
static inline __attribute_const__ int get_order(unsigned long size)
{
if (__builtin_constant_p(size)) {//__builtin_constant_p判断size是否是常量,如果是,走if语句
if (!size)
return BITS_PER_LONG - PAGE_SHIFT;
if (size < (1UL << PAGE_SHIFT))//PAGE_SHIFT=12,size<4K
return 0;
//如果size>=4k,执行如下语句,log2((size)-1) -12+1
return ilog2((size) - 1) - PAGE_SHIFT + 1;
}
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
return fls(size);
#else
return fls64(size);
#endif
}
mm/slub.c
//根据object size计算对应的slab缓存order
static inline unsigned int slab_order(unsigned int size,
unsigned int min_objects, unsigned int max_order,
unsigned int fract_leftover)
{
unsigned int min_order = slub_min_order;
unsigned int order;
//一般下面这个if语句不会走进去
if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
return get_order(size * MAX_OBJS_PER_PAGE) - 1;
//for循环,在最小order,到最大order之间的遍历,找出符合要求的order,一旦找到break出去,return返回
for (order = max(min_order, (unsigned int)get_order(min_objects * size));
order <= max_order; order++) {
unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
unsigned int rem;
//slab缓存分配完object后,剩余的unused大小
rem = slab_size % size;
//如果unused大小大于slab缓存空间的1/fract_leftover,则认为浪费,继续查找
if (rem <= slab_size / fract_leftover)
break;
}
return order;
}
//前面函数介绍完了,开始介绍正主.......
static inline int calculate_order(unsigned int size)
{
unsigned int order;
unsigned int min_objects;
unsigned int max_objects;
/*
* Attempt to find best configuration for a slab. This
* works by first attempting to generate a layout with
* the best configuration and backing off gradually.
*
* First we increase the acceptable waste in a slab. Then
* we reduce the minimum objects required in a slab.
*/
min_objects = slub_min_objects;
if (!min_objects)
//nr_cpu_ids代表当前机器的cpu个数,fls是获取最高bit位的位数,比如当前CPU为4,那么fls(4)=3
min_objects = 4 * (fls(nr_cpu_ids) + 1);
//在当前page大小下分配最大order的情况下,除以size,得到最大的objects数目
max_objects = order_objects(slub_max_order, size);
min_objects = min(min_objects, max_objects);
while (min_objects > 1) {
unsigned int fraction;
fraction = 16;
while (fraction >= 4) {
//核心函数,slab_order根据object的size计算对应的order
order = slab_order(size, min_objects,
slub_max_order, fraction);
if (order <= slub_max_order)
return order;
fraction /= 2;
}
min_objects--;
}
/*
* We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there.
*/
//如果while循环没有找到合适的order,设置min_objects=1,slab_order(size, 1, 3, 1),看能否找到匹配的order
order = slab_order(size, 1, slub_max_order, 1);
if (order <= slub_max_order)
return order;
/*
* Doh this slab cannot be placed using slub_max_order.
*/
//如果还是没找到,将阶数值调整至最大值MAX_ORDER(11),继续找
order = slab_order(size, 1, MAX_ORDER, 1);
if (order < MAX_ORDER)
return order;
return -ENOSYS;
}
(2)oo_make和oo_objects函数
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
//32位,高16位保存order,低16位保存一个slab缓存包含多少object,更新和read这个参数是原子操作,不容许被打断
struct kmem_cache_order_objects {
unsigned int x;
};
#define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1) //65535,二进制是16个1
static inline struct kmem_cache_order_objects oo_make(unsigned int order,
unsigned int size)
{
struct kmem_cache_order_objects x = {
(order << OO_SHIFT) + order_objects(order, size) //OO_SHIFT=16,
};
return x;
}
static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
{
return x.x & OO_MASK; //取出低16位,得到一个slab缓存的object数目
}
1.2 set_min_partial和set_cpu_partial函数
初始化s->min_partial和s->cpu_partial参数
kmem_cache_opens函数中实际给予的输入参数
/*
* The larger the object size is, the more pages we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2);//ilog2就是数学中以2为低的log函数
set_cpu_partial(s);
/*
* Mininum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
*/
#define MIN_PARTIAL 5
/*
* Maximum number of desirable partial slabs.
*/
#define MAX_PARTIAL 10
//默认返回0,开启slub debug后,正常也是返回0,除非有特殊配置
static inline int kmem_cache_debug(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_DEBUG
return unlikely(s->flags & SLAB_DEBUG_FLAGS);
#else
return 0;
#endif
}
static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
{
//如果配置了CONFIG_SLUB_CPU_PARTIAL,则看kmem_cache_debug函数,如前所述,该函数默认返回0,取反则返回1
#ifdef CONFIG_SLUB_CPU_PARTIAL
return !kmem_cache_debug(s);
#else
return false;
#endif
}
mm/slub.c
static void set_min_partial(struct kmem_cache *s, unsigned long min)
{
if (min < MIN_PARTIAL)
min = MIN_PARTIAL;
else if (min > MAX_PARTIAL)
min = MAX_PARTIAL;
s->min_partial = min;
}
static void set_cpu_partial(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
/*
* cpu_partial determined the maximum number of objects kept in the
* per cpu partial lists of a processor.
*
* Per cpu partial lists mainly contain slabs that just have one
* object freed. If they are used for allocation then they can be
* filled up again with minimal effort. The slab will never hit the
* per node partial lists and therefore no locking will be required.
*
* This setting also determines
*
* A) The number of objects from per cpu partial slabs dumped to the
* per node list when we reach the limit.
* B) The number of objects in cpu partial slabs to extract from the
* per node list when we run out of per cpu objects. We only fetch
* 50% to keep some capacity around for frees.
*/
//看是否配置了CONFIG_SLUB_CPU_PARTIAL,如果配置了,返回1,否则返回0
if (!kmem_cache_has_cpu_partial(s))
s->cpu_partial = 0;
else if (s->size >= PAGE_SIZE)//PAGE_SIZE == 4K
s->cpu_partial = 2;
else if (s->size >= 1024)
s->cpu_partial = 6;
else if (s->size >= 256)
s->cpu_partial = 13;
else
s->cpu_partial = 30;
#endif
}
1.3 init_kmem_cache_nodes函数
初始化slab缓存的s->node参数
include/linux/nodemask.h
#if MAX_NUMNODES > 1
#define for_each_node_mask(node, mask) \
for ((node) = first_node(mask); \
(node) < MAX_NUMNODES; \
(node) = next_node((node), (mask)))
#else /* MAX_NUMNODES == 1 */
#define for_each_node_mask(node, mask) \
if (!nodes_empty(mask)) \
for ((node) = 0; (node) < 1; (node)++)
#endif /* MAX_NUMNODES */
//所有node节点都遍历一遍
#define for_each_node_state(__node, __state) \
for_each_node_mask((__node), node_states[__state])
* Bitmasks that are kept for all the nodes.
*/
enum node_states {
N_POSSIBLE, /* The node could become online at some point */
N_ONLINE, /* The node is online */
N_NORMAL_MEMORY, /* The node has regular memory */
#ifdef CONFIG_HIGHMEM
N_HIGH_MEMORY, /* The node has regular or high memory */
#else
N_HIGH_MEMORY = N_NORMAL_MEMORY,
#endif
N_MEMORY, /* The node has memory(regular, high, movable) */
N_CPU, /* The node has one or more cpus */
NR_NODE_STATES
};
static void init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
atomic_long_set(&n->nr_slabs, 0);
atomic_long_set(&n->total_objects, 0);
INIT_LIST_HEAD(&n->full);
#endif
}
mm/slub.c
//正主来啦.....
static int init_kmem_cache_nodes(struct kmem_cache *s)
{
int node;
/*所有node节点都遍历一遍,并进入如下操作
* 1. 为所遍历的节点申请一个kmem_cache_node结构空间对象
* 2. 将kmem_cache结构的s的成员node数组中的node节点初始化为kmem_cache_node* n
*/
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n;
//一般情况下,此时的slab_state是UP,如前所述,系统初期会将其置成UP
/*
如果是DOWN状态,表示这个node上面还没有slab,调用early_kmem_cache_node_alloc分配一个slab,同时初始化跟slab对应的page结构体
*/
if (slab_state == DOWN) {
early_kmem_cache_node_alloc(node);
continue;
}
//slab_state = UP,调用如下函数分配一个kmem_cache_node,这个函数最后实际上也是调用slab_alloc_node来实现,在slub allocator工作原理这边文章里有介绍
/*
跟前面分配一个slab缓存一样,它也是返回一个该node上面第一个空闲的object来实现的
*/
n = kmem_cache_alloc_node(kmem_cache_node,
GFP_KERNEL, node);
//释放这个slab缓存s
if (!n) {
free_kmem_cache_nodes(s);
return 0;
}
//初始化n的参数n->nr_partial=0,n->list_lock,n->partial等
init_kmem_cache_node(n);
//初始化s->node数组上索引为node的值
s->node[node] = n;
}
return 1;
}
1.4 alloc_kmem_cache_cpus函数
该函数主要通过__alloc_percpu()为每个CPU申请空间,然后通过init_kmem_cache_cpus()将申请空间初始化至每个CPU上。
#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10)
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) //PAGE_SHIFT=12
include/linux/build_bug.h
/**
* BUILD_BUG_ON - break compile if a condition is true.
* @condition: the condition which the compiler should know is false.
*
* If you have some code which relies on certain constants being equal, or
* some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
* detect if someone changes it.
*
* The implementation uses gcc's reluctance to create a negative array, but gcc
* (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
* inline functions). Luckily, in 4.3 they added the "error" function
* attribute just for this type of case. Thus, we use a negative sized array
* (should always create an error on gcc versions older than 4.4) and then call
* an undefined function with the error attribute (should always create an
* error on gcc 4.3 and later). If for some reason, neither creates a
* compile-time error, we'll still have a link-time error, which is harder to
* track down.
*/
#ifndef __OPTIMIZE__
//condition为真时,!!(condition) = 1,为char[-1],会在编译时报错;为假时,!!(condition) = 0,为char[0]
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#else
#define BUILD_BUG_ON(condition) \
BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
#endif
static void init_kmem_cache_cpus(struct kmem_cache *s)
{
int cpu;
//遍历当前机器的所有CPU,per_cpu_ptr将前面分配的perCPU变量跟对应的CPU对应起来,同时初始化perCPU变量里面的tid参数
for_each_possible_cpu(cpu)
per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
}
mm/slub.c
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
//如果下面的条件成立,则在编译的时候就会报错
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
/*
* Must align to double word boundary for the double cmpxchg
* instructions to work; see __pcpu_double_call_return_bool().
*/
//为每个CPU申请空间,__alloc_percpu涉及CPU方面相关的知识,这里就不展开描述了,知道是分配了一个percpu就行
s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2 * sizeof(void *));
if (!s->cpu_slab)
return 0;
//将每个percpu变量跟相应的CPU对应上
init_kmem_cache_cpus(s);
return 1;
}
2、list_add函数
将新创建的kmem_cache slab缓存添加到全局链表slab_caches中
名词释义
cpu_online_map:在当前的CPU中,有多少个可用的CPU
1. 本机CPU个数为8个,都是online的,则cpu_online_map为8;
2. 将其中一个设置为offline,则cpu_online_map变为7。
双感叹号作用!!:对于将非0值,统一返回1,对于0返回的还是0
附加知识点
create_kmalloc_caches不是调用的kmem_cache_create来创建object大小不同的slab缓存,但是会调用kmem_cache_create函数创建slab缓存时也会用到的kmem_cache_zalloc来实现的,而且是循环调用
mm/slab_common.c
create_kmalloc_caches
->循环调用new_kmalloc_cache
->create_kmalloc_cache
->kmem_cache_zalloc
->create_boot_cache(初始化slab缓存结构的部分字段)
参考资料(非常感谢如下文章的作者提供的资料):
【原创】(十一)Linux内存管理slub分配器 - LoyenWang - 博客园
SLUB结构体创建及创建slab分析 - 云+社区 - 腾讯云
ARM64内存管理八:slub创建 | Black-Jack
【Linux内存源码分析】SLUB分配算法(3) – JeanLeo 博客