SLUB内存管理的4个主要接口函数介绍(1)

 slub内存管理的4个主要接口函数如下:

//slab缓存的创建
struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *));
//slab object的分配
void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
//slab object的释放
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
//slab缓存的释放
void kmem_cache_destroy(struct kmem_cache *);

本篇主要对kmem_cache_create介绍

一、函数详细调用关系图

二、kmem_cache_create 函数代码具体介绍

        kmem_cache_create 函数直接调用kmem_cache_create_usercopy来完成slab缓存的创建

mm/slab_common.c
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
		slab_flags_t flags, void (*ctor)(void *))
{
	return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
					  ctor);
}
EXPORT_SYMBOL(kmem_cache_create);


/*
 * kmem_cache_create_usercopy - Create a cache.
 * @name: A string which is used in /proc/slabinfo to identify this cache.
 * @size: The size of objects to be created in this cache.
 * @align: The required alignment for the objects.
 * @flags: SLAB flags
 * @useroffset: Usercopy region offset
 * @usersize: Usercopy region size
 * @ctor: A constructor for the objects.
 *
 * Returns a ptr to the cache on success, NULL on failure.
 * Cannot be called within a interrupt, but can be interrupted.
 * The @ctor is run when new pages are allocated by the cache.
 *
 * The flags are
 *
 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 * to catch references to uninitialised memory.
 *
 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 * for buffer overruns.
 *
 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 * cacheline.  This can be beneficial if you're counting cycles as closely
 * as davem.
 */
struct kmem_cache *
kmem_cache_create_usercopy(const char *name,
		  unsigned int size, unsigned int align,
		  slab_flags_t flags,
		  unsigned int useroffset, unsigned int usersize,
		  void (*ctor)(void *))
{
	struct kmem_cache *s = NULL;
	const char *cache_name;
	int err;

	get_online_cpus();//是对cpu_online_map的加锁,其与末尾的put_online_cpus()是配对使用的
	get_online_mems();//
	memcg_get_cache_ids();

	mutex_lock(&slab_mutex);
//如上四个函数都是对相关变量的加锁操作,末尾有对应的解锁操作

	err = kmem_cache_sanity_check(name, size); //对name和size进行校验,如果不符合要求返回-EINVAL,正常返回0
	if (err) {
		goto out_unlock;
	}

	/* Refuse requests with allocator specific flags */
	if (flags & ~SLAB_FLAGS_PERMITTED) { //标志位的检查
		err = -EINVAL;
		goto out_unlock;
	}

	/*
	 * Some allocators will constraint the set of valid flags to a subset
	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
	 * case, and we'll just provide them with a sanitized version of the
	 * passed flags.
	 */
	flags &= CACHE_CREATE_MASK;//标志位的检查

	/* Fail closed on bad usersize of useroffset values. */
//检查传递的参数,名字都是否正确。在这里usersize=useroffset=0的
	if (WARN_ON(!usersize && useroffset) ||
	    WARN_ON(size < usersize || size - usersize < useroffset))
		usersize = useroffset = 0;

	if (!usersize)
//检查传递的大小和一些flag是否可以和系统中已经创建的slab匹配上,
//如果匹配上则就不用重新申请了,直接使用别名就行,相当于链接过去,下面if判断就直接跳到末尾执行,否则返回NULL,继续执行
		s = __kmem_cache_alias(name, size, align, flags, ctor);
	if (s)
		goto out_unlock;
//定义这个缓存的名字,用于在/proc/slabinfo中显示
	cache_name = kstrdup_const(name, GFP_KERNEL);
	if (!cache_name) {
		err = -ENOMEM;
		goto out_unlock;
	}
//kmem_cache_create_usercopy的关键函数,如果没有找到可以复用的slab缓存,则创建一个新的slab缓存
//calculate_alignment计算内存对齐的值
	s = create_cache(cache_name, size,
			 calculate_alignment(flags, align, size),
			 flags, useroffset, usersize, ctor, NULL, NULL);
	if (IS_ERR(s)) {
		err = PTR_ERR(s);
		kfree_const(cache_name);
	}

out_unlock:
//跟前面的加锁对应的解锁操作
	mutex_unlock(&slab_mutex);

	memcg_put_cache_ids();
	put_online_mems();
	put_online_cpus();

	if (err) {
		if (flags & SLAB_PANIC)
			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
				name, err);
		else {
			pr_warn("kmem_cache_create(%s) failed with error %d\n",
				name, err);
			dump_stack();
		}
		return NULL;
	}
	return s;
}
EXPORT_SYMBOL(kmem_cache_create_usercopy);
 

三、​__kmem_cache_alias​函数

        检查传递的大小和一些flag是否可以和系统中已经创建的slab匹配上,如果匹配上则就不用重新申请了,直接使用别名就行,相当于链接过去,主要靠find_mergeable函数实现,否则,返回NULL

__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
		   slab_flags_t flags, void (*ctor)(void *))
{
	struct kmem_cache *s, *c;

	s = find_mergeable(size, align, flags, name, ctor); //核心函数,找寻是否有已经创建的slab缓存,是否可以复用,失败则返回NULL
	if (s) {
		s->refcount++;//可以复用,则引用计数加1

		/*
		 * Adjust the object sizes so that we clear
		 * the complete object on kzalloc.
		 */
		s->object_size = max(s->object_size, size);
		s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));

		for_each_memcg_cache(c, s) {
			c->object_size = s->object_size;
			c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
		}
//在bootup期间缓冲别名,直到 sysfs 可用,以免丢失该slab复用信息。
		if (sysfs_slab_alias(s, name)) {
			s->refcount--;
			s = NULL;
		}
	}

	return s;
}

1、find_mergeable函数

mm/slab.h
/* If !memcg, all caches are root. */
#define slab_root_caches	slab_caches //slab缓存链表
#define root_caches_node	list //链表name

mm/slab_common.c
struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
		slab_flags_t flags, const char *name, void (*ctor)(void *))
{
	struct kmem_cache *s;

	if (slab_nomerge)//如果这个slab缓存不让复用
		return NULL;

	if (ctor)
		return NULL;

	size = ALIGN(size, sizeof(void *));
	align = calculate_alignment(flags, align, size);
	size = ALIGN(size, align);
	flags = kmem_cache_flags(size, flags, name, NULL);

	if (flags & SLAB_NEVER_MERGE)//flags标记了不允许使用复用的slab缓存
		return NULL;
//遍历系统中所有slab构成的链表slab_root_caches,找到符合要求的slab缓存,并返回S,否则返回NULL
	list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
		if (slab_unmergeable(s))
			continue;

		if (size > s->size)
			continue;

		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
			continue;
		/*
		 * Check if alignment is compatible.
		 * Courtesy of Adrian Drzewiecki
		 */
		if ((s->size & ~(align - 1)) != s->size)
			continue;

		if (s->size - size >= sizeof(void *))
			continue;

		if (IS_ENABLED(CONFIG_SLAB) && align &&
			(align > s->align || s->align % align))
			continue;

		return s;
	}
	return NULL;
}

四、create_cache

        如果没有合适的可以复用的,则调用create_cache重新创建一个slab缓存create_cache(cache_name, size,calculate_alignment(flags, align, size),flags, useroffset, usersize, ctor, NULL, NULL);

先介绍一下calculate_alignment函数
* Figure out what the alignment of the objects will be given a set of
 * flags, a user specified alignment and the size of the objects.
 */
//返回内存对齐值
static unsigned int calculate_alignment(slab_flags_t flags,
		unsigned int align, unsigned int size)
{
	/*
	 * If the user wants hardware cache aligned objects then follow that
	 * suggestion if the object is sufficiently large.
	 *
	 * The hardware cache alignment cannot override the specified
	 * alignment though. If that is greater then use it.
	 */
//如果使能了SLAB_HWCACHE_ALIGN,代表按照硬件cache对齐
	if (flags & SLAB_HWCACHE_ALIGN) {
		unsigned int ralign;

		ralign = cache_line_size();//得到的是L1 cache line的大小,一般是64/32B
		while (size <= ralign / 2)
			ralign /= 2;
		align = max(align, ralign);
	}

	if (align < ARCH_SLAB_MINALIGN)//如果align小于slab 最小值则,直接取最小值8B
		align = ARCH_SLAB_MINALIGN;

	return ALIGN(align, sizeof(void *));//ALIGN(align,8)返回以8为边界(必须是8的倍数,且向上取数)的字节对齐后的align
}

mm/slab_common.c
正主来了......
static struct kmem_cache *create_cache(const char *name,
		unsigned int object_size, unsigned int align,
		slab_flags_t flags, unsigned int useroffset,
		unsigned int usersize, void (*ctor)(void *),
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
	struct kmem_cache *s;
	int err;
//useroffset和usersize初始值就是0,跳过if判断
	if (WARN_ON(useroffset + usersize > object_size))
		useroffset = usersize = 0;

	err = -ENOMEM;
//分配一个kmem_cache结构的对象,根据代码实现你会发现,这里实际是从名为“kmem_cache”中分配一个
//object,而分配的object刚好就是我们需要的一个slab大小(slab大小的最小值,s->min),而且内存上的内容为0( 设置了__GFP_ZERO),这里理解就是分配了一个kmem_cache结构的对象即可
	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
	if (!s)
		goto out;
//初始化已分配的kmem_cache结构里面的部分字段
	s->name = name;
	s->size = s->object_size = object_size;
	s->align = align;
	s->ctor = ctor;
	s->useroffset = useroffset;
	s->usersize = usersize;
//初始化kmem_cache中的memcg_cache_params 参数,memory cgroup提供进程的内存隔离功能,默认初始化成功返回0
	err = init_memcg_params(s, memcg, root_cache);
	if (err)
		goto out_free_cache;
//create_cache的核心函数,slab/slub/slob均实现了该函数,slab缓存创建成功,则返回0
	err = __kmem_cache_create(s, flags);
	if (err)
		goto out_free_cache;
//初始化引用计数为1
	s->refcount = 1;
//将创建的slab缓存添加到系统的全局slab链表中
	list_add(&s->list, &slab_caches);
	memcg_link_cache(s);
out:
	if (err)
		return ERR_PTR(err);
//这里返回创建的slab缓存
	return s;

out_free_cache:
	destroy_memcg_params(s);
	kmem_cache_free(kmem_cache, s);
	goto out;
}

1、__kmem_cache_create函数和kmem_cache_open函数        

        现在开始进入create_cache的核心函数__kmem_cache_create(s,flags),而__kmem_cache_create的核心函数是kmem_cache_open

mm/slab.h
/*
 * State of the slab allocator.
 *
 * This is used to describe the states of the allocator during bootup.
 * Allocators use this to gradually bootstrap themselves. Most allocators
 * have the problem that the structures used for managing slab caches are
 * allocated from slab caches themselves.
 */
enum slab_state {
	DOWN,			/* No slab functionality yet */
	PARTIAL,		/* SLUB: kmem_cache_node available */
	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
	UP,			/* Slab caches usable but not all extras yet */
	FULL			/* Everything is working */
};


int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
{
	int err;
//成功执行,返回0,核心函数
	err = kmem_cache_open(s, flags);
	if (err)
		return err;

	/* Mutex is not taken during early boot */
//系统启动初期调用kmem_cache_init()->create_kmalloc_caches()创建多个管理不同大小对象的kmem_cache
//如果slab state的状态小于等于UP则return 0退出,,否则会继续往下,目前看到的是create_kmem_caches函数会将其置为UP
	if (slab_state <= UP)
		return 0;
//将slab信息添加到sys节点下,这样/sys/kernel/slab下会有注册好的slab
	memcg_propagate_slab_attrs(s);
	err = sysfs_slab_add(s);
	if (err)
		__kmem_cache_release(s);

	return err;
}

介绍kmem_cache_open函数

static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
//根据slub_debug标志看是否要更新flags
    s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
	s->random = get_random_long();
#endif
//正常成功执行是返回1,根据object大小,计算出最佳的order值和这个slab缓存的object数目,然后初始化kmem_cache 结构中的s->oo,s->min,s->size,s->max值
	if (!calculate_sizes(s, -1))
		goto error;
//开启slub debug后,disable_higher_order_debug为1,否则为0
	if (disable_higher_order_debug) {
		/*
		 * Disable debugging flags that store metadata if the min slab
		 * order increased.
		 */
//如果开启debug后,会导致slab的最小的order(s->min)增加,则disable掉debug flag标志,重新执行一次calculate_sizes
		if (get_order(s->size) > get_order(s->object_size)) {
			s->flags &= ~DEBUG_METADATA_FLAGS;
			s->offset = 0;
			if (!calculate_sizes(s, -1))
				goto error;
		}
	}
//使能快速模式,后面slab缓存对象分配的时候要用到???
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
	if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
		/* Enable fast mode */
		s->flags |= __CMPXCHG_DOUBLE;
#endif

	/*
	 * The larger the object size is, the more pages we want on the partial
	 * list to avoid pounding the page allocator excessively.
	 */
	set_min_partial(s, ilog2(s->size) / 2);//初始化s->min_partial

	set_cpu_partial(s);//初始化s->cpu_partial

//如果定义了CONFIG_NUMA,则要更新
#ifdef CONFIG_NUMA
	s->remote_node_defrag_ratio = 1000;
#endif

	/* Initialize the pre-computed randomized freelist if slab is up */
//默认slab_state是down
	if (slab_state >= UP) {
		if (init_cache_random_seq(s))
			goto error;
	}
//初始化s->node,如成功则返回1
	if (!init_kmem_cache_nodes(s))
		goto error;
//初始化s->cpu_slab,如成功则返回1,然后退出kmem_cache_open函数
	if (alloc_kmem_cache_cpus(s))
		return 0;
//如果alloc_kmem_cache_cpus返回0,则释放slab缓存s,并打印错误信息
	free_kmem_cache_nodes(s);
error:
	if (flags & SLAB_PANIC)
		panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
		      s->name, s->size, s->size,
		      oo_order(s->oo), s->offset, (unsigned long)flags);
	return -EINVAL;
}

1.1 calculate_sizes

        根据object大小,计算出最佳order值,得到这个slab缓存的object数目,然后初始化kmem_cache 结构中的s->oo,s->min,s->size,s->max值

/*
 * calculate_sizes() determines the order and the distribution of data within
 * a slab object.
 */
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
	slab_flags_t flags = s->flags;
	unsigned int size = s->object_size;
	unsigned int order;

	/*
	 * Round up object size to the next word boundary. We can only
	 * place the free pointer at word boundaries and this determines
	 * the possible location of the free pointer.
	 */
//按照8字节对齐,size必须是8字节的倍数
	size = ALIGN(size, sizeof(void *));

#ifdef CONFIG_SLUB_DEBUG
	/*
	 * Determine if we can poison the object itself. If the user of
	 * the slab may touch the object after free or before allocation
	 * then we should never poison the object itself.
	 */
//开启slub debug后,确认是否开启slab_poison标志
	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
			!s->ctor)
		s->flags |= __OBJECT_POISON;
	else
		s->flags &= ~__OBJECT_POISON;


	/*
	 * If we are Redzoning then check if there is some space between the
	 * end of the object and the free pointer. If not then add an
	 * additional word to have some bytes to store Redzone information.
	 */
//开启slub debug后,如果object尾部和下一个object对象头部之间没有剩余空间,则增加8B用于存放Redzone信息
	if ((flags & SLAB_RED_ZONE) && size == s->object_size)
		size += sizeof(void *);
#endif

	/*
	 * With that we have determined the number of bytes in actual use
	 * by the object. This is the potential offset to the free pointer.
	 */
//object_size按照word对齐之后的大小
	s->inuse = size;
//当在做RCU,有构造函数,析构函数或者正在posing时,不允许使用object的头8个字节用来存放FP(free pointer),即不允许指针内置式,
//那么我们就是使用指针外置式,在object的尾部在分配sizeof(void *)字节来保存这个指针
	if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
		s->ctor)) {
		/*
		 * Relocate free pointer after the object if it is not
		 * permitted to overwrite the first word of the object on
		 * kmem_cache_free.
		 *
		 * This is the case if we do RCU, have a constructor or
		 * destructor or are poisoning the objects.
		 */
		s->offset = size;
		size += sizeof(void *);
	}

#ifdef CONFIG_SLUB_DEBUG
//开启slub debug后,根据slub debug的object的layout,需要添加内存来保存alloc/free track信息,因此需要增加object的size大小
	if (flags & SLAB_STORE_USER)
		/*
		 * Need to store information about allocs and frees after
		 * the object.
		 */
		size += 2 * sizeof(struct track);
#endif
//对开启slub debug的object layout里面的readzone区域大小进行检查
	kasan_cache_create(s, &size, &s->flags);
#ifdef CONFIG_SLUB_DEBUG
//开启slub debug后,根据slub debug的object的layout,对于padding和red_left_pad分别需要添加8B内存保存相应信息
	if (flags & SLAB_RED_ZONE) {
		/*
		 * Add some empty padding so that we can catch
		 * overwrites from earlier objects rather than let
		 * tracking information or the free pointer be
		 * corrupted if a user writes before the start
		 * of the object.
		 */
		size += sizeof(void *);

		s->red_left_pad = sizeof(void *);
		s->red_left_pad = ALIGN(s->red_left_pad, s->align);
		size += s->red_left_pad;
	}
#endif

	/*
	 * SLUB stores one object immediately after another beginning from
	 * offset 0. In order to align the objects we have to simply size
	 * each object to conform to the alignment.
	 */
//无论是否开启slub debug,最后基于s->align来进行字节对齐,最后将最终的size赋值给s->size
	size = ALIGN(size, s->align);
	s->size = size;
//根据前面输入的参数,forced_order == -1,走else进入calculate_order
/*
calculate_order函数 
a.根据size确定max_objects和min_objects大小
b.在while循环中找出合理的order,然后return
c.如果没合适的order,将min_object设为1,计算order,return
d.如果还是没有合适的,将循环的最大order设为MAX_ORDER(11),计算order,return
*/
	if (forced_order >= 0)
		order = forced_order;
	else
		order = calculate_order(size);

	if ((int)order < 0)
		return 0;
//初始化从伙伴系统分配内存掩码为0
	s->allocflags = 0;
	if (order)
		s->allocflags |= __GFP_COMP;

	if (s->flags & SLAB_CACHE_DMA)
		s->allocflags |= GFP_DMA;

	if (s->flags & SLAB_CACHE_DMA32)
		s->allocflags |= GFP_DMA32;

	if (s->flags & SLAB_RECLAIM_ACCOUNT)
		s->allocflags |= __GFP_RECLAIMABLE;

	/*
	 * Determine the number of objects per slab
	 */
//根据这个slab缓存的 order大小和object的大小,利用oo_make函数组合得到s->oo
	s->oo = oo_make(order, size);
//这个get_order(size)值一般就是等于上面的order,没看明白这里为什么要用这个函数
	s->min = oo_make(get_order(size), size);
//oo_objects取出s->oo低16位值,即一个slab缓存的object数目,同样对s->max执行相同操作,得到最大值,如果大于s->max,则令s->max=s->oo
	if (oo_objects(s->oo) > oo_objects(s->max))
		s->max = s->oo;
//根据后面的名词释义,这里返回1
	return !!oo_objects(s->oo);
}

(1)calculate_order函数

        根据size,返回分配给slab缓存的order,根据order,可以得到该slab缓存的page数目是2^order。

mm/slub.c
/*
 * Mininum / Maximum order of slab pages. This influences locking overhead
 * and slab fragmentation. A higher order reduces the number of partial slabs
 * and increases the number of allocations possible without having to
 * take the list_lock.
 */
//这三个是静态全局变量
static unsigned int slub_min_order;//默认为0
static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; //等于3
static unsigned int slub_min_objects;//默认为0
/*
 * Mininum number of partial slabs. These will be left on the partial
 * lists even if they are empty. kmem_cache_shrink may reclaim them.
 */
#define MIN_PARTIAL 5

/*
 * Maximum number of desirable partial slabs.
 * The existence of more partial slabs makes kmem_cache_shrink
 * sort the partial list by the number of objects in use.
 */
#define MAX_PARTIAL 10
#define OO_SHIFT	16
#define OO_MASK		((1 << OO_SHIFT) - 1)
#define MAX_OBJS_PER_PAGE	32767 /* since page.objects is u15 *///每个page,最大的object数目是32767

//计算给定size,对应order下,slab缓存里面可以包含多少object
static inline unsigned int order_objects(unsigned int order, unsigned int size)
{
    return ((unsigned int)PAGE_SIZE << order) / size;//PAGE_SIZE=4K
}

include/asm-generic/getorder.h
static inline __attribute_const__ int get_order(unsigned long size)
{
	if (__builtin_constant_p(size)) {//__builtin_constant_p判断size是否是常量,如果是,走if语句
		if (!size)
			return BITS_PER_LONG - PAGE_SHIFT;

		if (size < (1UL << PAGE_SHIFT))//PAGE_SHIFT=12,size<4K
			return 0;
//如果size>=4k,执行如下语句,log2((size)-1) -12+1
		return ilog2((size) - 1) - PAGE_SHIFT + 1;
	}

	size--;
	size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
	return fls(size);
#else
	return fls64(size);
#endif
}

mm/slub.c
//根据object size计算对应的slab缓存order
static inline unsigned int slab_order(unsigned int size,
		unsigned int min_objects, unsigned int max_order,
		unsigned int fract_leftover)
{
	unsigned int min_order = slub_min_order;
	unsigned int order;
//一般下面这个if语句不会走进去
	if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
//for循环,在最小order,到最大order之间的遍历,找出符合要求的order,一旦找到break出去,return返回
	for (order = max(min_order, (unsigned int)get_order(min_objects * size));
			order <= max_order; order++) {

		unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
		unsigned int rem;
//slab缓存分配完object后,剩余的unused大小
		rem = slab_size % size;
//如果unused大小大于slab缓存空间的1/fract_leftover,则认为浪费,继续查找
		if (rem <= slab_size / fract_leftover)
			break;
	}

	return order;
}

//前面函数介绍完了,开始介绍正主.......
static inline int calculate_order(unsigned int size)
{
	unsigned int order;
	unsigned int min_objects;
	unsigned int max_objects;

	/*
	 * Attempt to find best configuration for a slab. This
	 * works by first attempting to generate a layout with
	 * the best configuration and backing off gradually.
	 *
	 * First we increase the acceptable waste in a slab. Then
	 * we reduce the minimum objects required in a slab.
	 */
	min_objects = slub_min_objects;
	if (!min_objects)
//nr_cpu_ids代表当前机器的cpu个数,fls是获取最高bit位的位数,比如当前CPU为4,那么fls(4)=3
		min_objects = 4 * (fls(nr_cpu_ids) + 1);
//在当前page大小下分配最大order的情况下,除以size,得到最大的objects数目
	max_objects = order_objects(slub_max_order, size);
	min_objects = min(min_objects, max_objects);

	while (min_objects > 1) {
		unsigned int fraction;

		fraction = 16;
		while (fraction >= 4) {
//核心函数,slab_order根据object的size计算对应的order
			order = slab_order(size, min_objects,
					slub_max_order, fraction);
			if (order <= slub_max_order)
				return order;
			fraction /= 2;
		}
		min_objects--;
	}

	/*
	 * We were unable to place multiple objects in a slab. Now
	 * lets see if we can place a single object there.
	 */
//如果while循环没有找到合适的order,设置min_objects=1,slab_order(size, 1, 3, 1),看能否找到匹配的order
	order = slab_order(size, 1, slub_max_order, 1);
	if (order <= slub_max_order)
		return order;

	/*
	 * Doh this slab cannot be placed using slub_max_order.
	 */
//如果还是没找到,将阶数值调整至最大值MAX_ORDER(11),继续找
	order = slab_order(size, 1, MAX_ORDER, 1);
	if (order < MAX_ORDER)
		return order;
	return -ENOSYS;
}

        (2)oo_make和oo_objects函数

/*
 * Word size structure that can be atomically updated or read and that
 * contains both the order and the number of objects that a slab of the
 * given order would contain.
 */
//32位,高16位保存order,低16位保存一个slab缓存包含多少object,更新和read这个参数是原子操作,不容许被打断
struct kmem_cache_order_objects {
	unsigned int x;
};

#define OO_SHIFT	16
#define OO_MASK		((1 << OO_SHIFT) - 1) //65535,二进制是16个1

static inline struct kmem_cache_order_objects oo_make(unsigned int order,
		unsigned int size)
{
	struct kmem_cache_order_objects x = {
		(order << OO_SHIFT) + order_objects(order, size) //OO_SHIFT=16,
	};

	return x;
}

static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
{
	return x.x & OO_MASK; //取出低16位,得到一个slab缓存的object数目
}

1.2 set_min_partial和set_cpu_partial函数

        初始化s->min_partial和s->cpu_partial参数


kmem_cache_opens函数中实际给予的输入参数
/*
 * The larger the object size is, the more pages we want on the partial
 * list to avoid pounding the page allocator excessively.
 */
set_min_partial(s, ilog2(s->size) / 2);//ilog2就是数学中以2为低的log函数
set_cpu_partial(s);

/*
 * Mininum number of partial slabs. These will be left on the partial
 * lists even if they are empty. kmem_cache_shrink may reclaim them.
 */
#define MIN_PARTIAL 5
/*
 * Maximum number of desirable partial slabs.
 */
#define MAX_PARTIAL 10

//默认返回0,开启slub debug后,正常也是返回0,除非有特殊配置
static inline int kmem_cache_debug(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_DEBUG
	return unlikely(s->flags & SLAB_DEBUG_FLAGS);
#else
	return 0;
#endif
}

static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
{
//如果配置了CONFIG_SLUB_CPU_PARTIAL,则看kmem_cache_debug函数,如前所述,该函数默认返回0,取反则返回1
#ifdef CONFIG_SLUB_CPU_PARTIAL
	return !kmem_cache_debug(s);
#else
	return false;
#endif
}

mm/slub.c
static void set_min_partial(struct kmem_cache *s, unsigned long min)
{
	if (min < MIN_PARTIAL)
		min = MIN_PARTIAL;
	else if (min > MAX_PARTIAL)
		min = MAX_PARTIAL;
	s->min_partial = min;
}

static void set_cpu_partial(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
	/*
	 * cpu_partial determined the maximum number of objects kept in the
	 * per cpu partial lists of a processor.
	 *
	 * Per cpu partial lists mainly contain slabs that just have one
	 * object freed. If they are used for allocation then they can be
	 * filled up again with minimal effort. The slab will never hit the
	 * per node partial lists and therefore no locking will be required.
	 *
	 * This setting also determines
	 *
	 * A) The number of objects from per cpu partial slabs dumped to the
	 *    per node list when we reach the limit.
	 * B) The number of objects in cpu partial slabs to extract from the
	 *    per node list when we run out of per cpu objects. We only fetch
	 *    50% to keep some capacity around for frees.
	 */
//看是否配置了CONFIG_SLUB_CPU_PARTIAL,如果配置了,返回1,否则返回0
	if (!kmem_cache_has_cpu_partial(s))
		s->cpu_partial = 0;
	else if (s->size >= PAGE_SIZE)//PAGE_SIZE == 4K
		s->cpu_partial = 2;
	else if (s->size >= 1024)
		s->cpu_partial = 6;
	else if (s->size >= 256)
		s->cpu_partial = 13;
	else
		s->cpu_partial = 30;
#endif
}

1.3 init_kmem_cache_nodes函数

        初始化slab缓存的s->node参数

include/linux/nodemask.h
#if MAX_NUMNODES > 1
#define for_each_node_mask(node, mask)			\
	for ((node) = first_node(mask);			\
		(node) < MAX_NUMNODES;			\
		(node) = next_node((node), (mask)))
#else /* MAX_NUMNODES == 1 */
#define for_each_node_mask(node, mask)			\
	if (!nodes_empty(mask))				\
		for ((node) = 0; (node) < 1; (node)++)
#endif /* MAX_NUMNODES */
//所有node节点都遍历一遍
#define for_each_node_state(__node, __state) \
	for_each_node_mask((__node), node_states[__state])

 * Bitmasks that are kept for all the nodes.
 */
enum node_states {
	N_POSSIBLE,		/* The node could become online at some point */
	N_ONLINE,		/* The node is online */
	N_NORMAL_MEMORY,	/* The node has regular memory */
#ifdef CONFIG_HIGHMEM
	N_HIGH_MEMORY,		/* The node has regular or high memory */
#else
	N_HIGH_MEMORY = N_NORMAL_MEMORY,
#endif
	N_MEMORY,		/* The node has memory(regular, high, movable) */
	N_CPU,		/* The node has one or more cpus */
	NR_NODE_STATES
};


static void init_kmem_cache_node(struct kmem_cache_node *n)
{
	n->nr_partial = 0;
	spin_lock_init(&n->list_lock);
	INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
	atomic_long_set(&n->nr_slabs, 0);
	atomic_long_set(&n->total_objects, 0);
	INIT_LIST_HEAD(&n->full);
#endif
}

mm/slub.c
//正主来啦.....
static int init_kmem_cache_nodes(struct kmem_cache *s)
{
	int node;
/*所有node节点都遍历一遍,并进入如下操作
* 1. 为所遍历的节点申请一个kmem_cache_node结构空间对象
* 2. 将kmem_cache结构的s的成员node数组中的node节点初始化为kmem_cache_node* n

*/
	for_each_node_state(node, N_NORMAL_MEMORY) {
		struct kmem_cache_node *n;
//一般情况下,此时的slab_state是UP,如前所述,系统初期会将其置成UP
/*
如果是DOWN状态,表示这个node上面还没有slab,调用early_kmem_cache_node_alloc分配一个slab,同时初始化跟slab对应的page结构体
*/
		if (slab_state == DOWN) {
			early_kmem_cache_node_alloc(node);
			continue;
		}
//slab_state = UP,调用如下函数分配一个kmem_cache_node,这个函数最后实际上也是调用slab_alloc_node来实现,在slub allocator工作原理这边文章里有介绍
/*
跟前面分配一个slab缓存一样,它也是返回一个该node上面第一个空闲的object来实现的
*/
		n = kmem_cache_alloc_node(kmem_cache_node,
						GFP_KERNEL, node);
//释放这个slab缓存s
		if (!n) {
			free_kmem_cache_nodes(s);
			return 0;
		}
//初始化n的参数n->nr_partial=0,n->list_lock,n->partial等
		init_kmem_cache_node(n);
//初始化s->node数组上索引为node的值
		s->node[node] = n;
	}
	return 1;
}

1.4 alloc_kmem_cache_cpus函数

        该函数主要通过__alloc_percpu()为每个CPU申请空间,然后通过init_kmem_cache_cpus()将申请空间初始化至每个CPU上。

#define PERCPU_DYNAMIC_EARLY_SIZE	(12 << 10)
#define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1) //PAGE_SHIFT=12
include/linux/build_bug.h
/**
 * BUILD_BUG_ON - break compile if a condition is true.
 * @condition: the condition which the compiler should know is false.
 *
 * If you have some code which relies on certain constants being equal, or
 * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
 * detect if someone changes it.
 *
 * The implementation uses gcc's reluctance to create a negative array, but gcc
 * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
 * inline functions).  Luckily, in 4.3 they added the "error" function
 * attribute just for this type of case.  Thus, we use a negative sized array
 * (should always create an error on gcc versions older than 4.4) and then call
 * an undefined function with the error attribute (should always create an
 * error on gcc 4.3 and later).  If for some reason, neither creates a
 * compile-time error, we'll still have a link-time error, which is harder to
 * track down.
 */
#ifndef __OPTIMIZE__
//condition为真时,!!(condition) = 1,为char[-1],会在编译时报错;为假时,!!(condition) = 0,为char[0]
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#else
#define BUILD_BUG_ON(condition) \
	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
#endif


static void init_kmem_cache_cpus(struct kmem_cache *s)
{
	int cpu;
//遍历当前机器的所有CPU,per_cpu_ptr将前面分配的perCPU变量跟对应的CPU对应起来,同时初始化perCPU变量里面的tid参数
	for_each_possible_cpu(cpu)
		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
}

mm/slub.c
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
//如果下面的条件成立,则在编译的时候就会报错
	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
			KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));

	/*
	 * Must align to double word boundary for the double cmpxchg
	 * instructions to work; see __pcpu_double_call_return_bool().
	 */
//为每个CPU申请空间,__alloc_percpu涉及CPU方面相关的知识,这里就不展开描述了,知道是分配了一个percpu就行
	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
				     2 * sizeof(void *));

	if (!s->cpu_slab)
		return 0;
//将每个percpu变量跟相应的CPU对应上
	init_kmem_cache_cpus(s);

	return 1;
}
 

2、list_add函数

将新创建的kmem_cache slab缓存添加到全局链表slab_caches中

名词释义

cpu_online_map:在当前的CPU中,有多少个可用的CPU

        1. 本机CPU个数为8个,都是online的,则cpu_online_map为8;

        2. 将其中一个设置为offline,则cpu_online_map变为7。

双感叹号作用!!:对于将非0值,统一返回1,对于0返回的还是0

附加知识点

        create_kmalloc_caches不是调用的kmem_cache_create来创建object大小不同的slab缓存,但是会调用kmem_cache_create函数创建slab缓存时也会用到的kmem_cache_zalloc来实现的,而且是循环调用

mm/slab_common.c
create_kmalloc_caches
	->循环调用new_kmalloc_cache
		->create_kmalloc_cache
			->kmem_cache_zalloc
			->create_boot_cache(初始化slab缓存结构的部分字段)

参考资料(非常感谢如下文章的作者提供的资料):

【原创】(十一)Linux内存管理slub分配器 - LoyenWang - 博客园

Slub分配器原理 - 知乎

SLUB结构体创建及创建slab分析 - 云+社区 - 腾讯云

ARM64内存管理八:slub创建 | Black-Jack

【Linux内存源码分析】SLUB分配算法(3) – JeanLeo 博客

图解slub

Linux-3.14.12内存管理笔记【SLUB分配算法(3)】-Jean_Leo-ChinaUnix博客

Linux-3.14.12内存管理笔记【SLUB分配算法(6)】-Jean_Leo-ChinaUnix博客

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值