水平有限,描述不当之处还请之处,转载请注明出处http://blog.csdn.net/vanbreaker/article/details/7666959
在前文中介绍了slab所涉及到的数据结构, slab分配器的初始化工作都是围绕这些数据结构来展开的,主要是针对以下两个问题:
1.创建kmem_cache高速缓存用来存储所有的cache描述符
2.创建array_cache和kmem_list3高速缓存用来存储slab数据结构中的这两个关键结构
这里明显有点自相矛盾,那就是slab管理器尚未建立起来,又如何靠slab分配高速缓存来给这些结构分配空间呢?
解决第一个问题的方法是直接静态定义一个名为cache_cache的kmem_cache结构,来管理所有的kmem_cache描述符,对于array_cache和kmem_list3,内核也是先静态定义,然后建立起普通高速缓存(general cache),再使用kmalloc分配普通高速缓存空间来替代之前静态定义的部分。
普通高速缓存是一组大小按几何倍数增长的高速缓存的合集,一个普通高速缓存用如下结构描述
/* Size description struct for general caches. */
struct cache_sizes {
size_t cs_size; /*general cache的大小*/
struct kmem_cache *cs_cachep; /*general cache的cache描述符指针*/
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *cs_dmacachep;
#endif
};
普通高速缓存的大小由malloc_sizes表来确定
/*
* These are the default caches for kmalloc. Custom caches can have other sizes.
*/
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h>
CACHE(ULONG_MAX)
#undef CACHE
};
其中<linux/kmalloc_sizes.h>中的内容为
#if (PAGE_SIZE == 4096)
CACHE(32)
#endif
CACHE(64)
#if L1_CACHE_BYTES < 64
CACHE(96)
#endif
CACHE(128)
#if L1_CACHE_BYTES < 128
CACHE(192)
#endif
CACHE(256)
CACHE(512)
CACHE(1024)
CACHE(2048)
CACHE(4096)
CACHE(8192)
CACHE(16384)
CACHE(32768)
CACHE(65536)
CACHE(131072)
#if KMALLOC_MAX_SIZE >= 262144
CACHE(262144)
#endif
#if KMALLOC_MAX_SIZE >= 524288
CACHE(524288)
#endif
#if KMALLOC_MAX_SIZE >= 1048576
CACHE(1048576)
#endif
#if KMALLOC_MAX_SIZE >= 2097152
CACHE(2097152)
#endif
#if KMALLOC_MAX_SIZE >= 4194304
CACHE(4194304)
#endif
#if KMALLOC_MAX_SIZE >= 8388608
CACHE(8388608)
#endif
#if KMALLOC_MAX_SIZE >= 16777216
CACHE(16777216)
#endif
#if KMALLOC_MAX_SIZE >= 33554432
CACHE(33554432)
#endif
cache_cache的初始化和普通高速缓存的建立由start_kernel()-->mm_init()-->kmem_cache_init()函数来完成,下面就来看具体的初始化代码
void __init kmem_cache_init(void)
{
size_t left_over;
struct cache_sizes *sizes;
struct cache_names *names;
int i;
int order;
int node;
if (num_possible_nodes() == 1)
use_alien_caches = 0;
/*初始化静态L3变量initkmem_list3*/
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
if (i < MAX_NUMNODES)
cache_cache.nodelists[i] = NULL;
}
/*将cache_cache和initkmem_list3相关联*/
set_up_list3s(&cache_cache, CACHE_CACHE);
/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
*/
if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = BREAK_GFP_ORDER_HI;
/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
* 1) initialize the cache_cache cache: it contains the struct
* kmem_cache structures of all caches, except cache_cache itself:
* cache_cache is statically allocated.
* Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
* 2) Create the first kmalloc cache.
* The struct kmem_cache for the new cache is allocated normally.
* An __init data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized
* head arrays.
* 4) Replace the __init data head arrays for cache_cache and the first
* kmalloc cache with kmalloc allocated arrays.
* 5) Replace the __init data for kmem_list3 for cache_cache and
* the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/
node = numa_node_id();
/*初始化cache_cache的其余部分*/
/* 1) create the cache_cache */
INIT_LIST_HEAD(&cache_chain);
list_add(&cache_cache.next, &cache_chain);
cache_cache.colour_off = cache_line_size();
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
/*
* struct kmem_cache size depends on nr_node_ids, which
* can be less than MAX_NUMNODES.
*/
cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
nr_node_ids * sizeof(struct kmem_list3 *);
#if DEBUG
cache_cache.obj_size = cache_cache.buffer_size;
#endif
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
cache_cache.reciprocal_buffer_size =
reciprocal_value(cache_cache.buffer_size);
/*计算cache_cache的剩余空间以及slab中对象的数目,order决定了slab的大小(PAGE_SIZE<<order)*/
for (order = 0; order < MAX_ORDER; order++) {
cache_estimate(order, cache_cache.buffer_size,
cache_line_size(), 0, &left_over, &cache_cache.num);
/*当该order计算出来的num,即slab中对象的数目不为0时,则跳出循环*/
if (cache_cache.num)
break;
}
BUG_ON(!cache_cache.num);
cache_cache.gfporder = order;/*确定分配给每个slab的页数的对数*/
cache_cache.colour = left_over / cache_cache.colour_off;/*确定可用颜色的数目*/
/*确定slab管理区的大小,即slab描述符以及kmem_bufctl_t数组*/
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
sizeof(struct slab), cache_line_size());
/* 2+3) create the kmalloc caches */
sizes = malloc_sizes;
names = cache_names;
/*
* Initialize the caches that provide memory for the array cache and the
* kmem_list3 structures first. Without this, further allocations will
* bug.
*/
/*为了后面能够调用kmalloc()创建per-CPU高速缓存和kmem_list3高速缓存,
这里必须先创建大小相应的general cache*/
sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
sizes[INDEX_AC].cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
/*如果AC和L3在malloc_sizes中的偏移不一样,也就是说它们的大小不属于同一级别,
则创建L3的gerneral cache,否则两者共用一个gerneral cache*/
if (INDEX_AC != INDEX_L3) {
sizes[INDEX_L3].cs_cachep =
kmem_cache_create(names[INDEX_L3].name,
sizes[INDEX_L3].cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
}
slab_early_init = 0;
/*创建各级的gerneral cache*/
while (sizes->cs_size != ULONG_MAX) {
/*
* For performance, all the general caches are L1 aligned.
* This should be particularly beneficial on SMP boxes, as it
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches.
*/
if (!sizes->cs_cachep) {
sizes->cs_cachep = kmem_cache_create(names->name,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL);
}
#ifdef CONFIG_ZONE_DMA
sizes->cs_dmacachep = kmem_cache_create(
names->name_dma,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
SLAB_PANIC,
NULL);
#endif
sizes++;
names++;
}
/* 4) Replace the bootstrap head arrays */
{
struct array_cache *ptr;
/*这里调用kmalloc()为cache_cache创建per-CPU高速缓存*/
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
/*将静态定义的initarray_cache中的array_cache拷贝到malloc申请到的空间中*/
memcpy(ptr, cpu_cache_get(&cache_cache),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
/*将cache_cache与保存per-CPU高速缓存的空间关联*/
cache_cache.array[smp_processor_id()] = ptr;
/*为之前创建的AC gerneral cache创建per-CPU高速缓存,替换静态定义的initarray_generic.cache*/
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
!= &initarray_generic.cache);
memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
ptr;
}
/* 5) Replace the bootstrap kmem_list3's */
{
int nid;
for_each_online_node(nid) {
/*为cache_cache的kmem_list3申请高速缓存空间,并替换静态定义的initkmem_list3*/
init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
/*为AC的kmem_list3申请高速缓存空间,并替换静态定义的initkmem_list3*/
init_list(malloc_sizes[INDEX_AC].cs_cachep,
&initkmem_list3[SIZE_AC + nid], nid);
if (INDEX_AC != INDEX_L3) {
/*为L3的kmem_list3申请高速缓存空间,并替换静态定义的initkmem_list3*/
init_list(malloc_sizes[INDEX_L3].cs_cachep,
&initkmem_list3[SIZE_L3 + nid], nid);
}
}
}
g_cpucache_up = EARLY;
}
- 前面大部分的代码都是围绕cache_cache展开的,主要是将cache_cache同静态kmem_list3进行关联,将cache_cache添加到cache_chain链表中,并且计算初始化内部的一些数据项
- 现在还没有高速缓存来存储cache_cache中的kmem_list3描述符和array_cache描述符,因此下面就要调用kmem_cache_create()建立高速缓存来存储这两种描述符
- 内核使用g_cpucache_up这个枚举量来表示slab分配器的初始化进度
static enum {
NONE,
PARTIAL_AC,
PARTIAL_L3,
EARLY,
FULL
} g_cpucache_up;
这个值的更新是在kmem_cache_create()-->setup_cpu_cache()函数中进行更新的,每调用一次kmem_cache_create(),g_cpucache_up的值就加1,直到它等于EARLY,比如说第一次调用kmem_cache_create()创建了AC(array_cache)的高速缓存,那么g_cpucache_up由NONE变为PARTIAL_AC,那么下次调用kmem_cache_create()创建L3高速缓存时,内核就知道AC高速缓存已经准备好了,也就是说可以在array_cache高速缓存中为L3高速缓存描述符的array_cache描述符分配高速缓存空间了。
- 创建了AC和L3高速缓存后就循环创建各级普通高速缓存,此时创建的高速缓存都是完整的了!也就是说里面的结构变量都已经是存储在相应的高速缓存中
- 由于AC高速缓存已经创建,因此kmalloc()动态创建一个array_cache对象替换cache_cache的静态array_cache
- 由于AC高速缓存描述符本身的array_cache描述符还未动态创建,因此同样kmalloc()动态创建一个array_cache替换AC高速缓存的静态array_cache
- 为cache_cache,AC,L3高速缓存分别动态创建kmem_list描述符对象,替换静态的initkmem_list3
- 将g_cpucache_up置为EARLY,表示slab分配器的初始化已初步完成
slab分配器初始化工作的最后一步由kmem_cache_init_late()函数完成,这个函数就不做详细分析了,它的工作就是设置cache_cache和各级普通高速缓存中的array_cache本地高速缓存的相关属性
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
/* 6) resize the head arrays to their final sizes */
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
mutex_unlock(&cache_chain_mutex);
/* Done! */
g_cpucache_up = FULL; /*slab初始化完成*/
/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();
/*
* Register a cpu startup notifier callback that initializes
* cpu_cache_get for all new cpus
*/
register_cpu_notifier(&cpucache_notifier);
/*
* The reap timers are started later, with a module init call: That part
* of the kernel is not yet operational.
*/
}