释放slab缓存对象的API函数是kmem_cache_free()
[mm/slab.c]
/**
* kmem_cache_free - Deallocate an object
* @cachep: The cache the allocation was from.
* @objp: The previously allocated object.
*
* Free an object which was previously allocated from this
* cache.
*/
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
/*通过要释放对象obj的虚拟地址找到对应的struct kmem_cache数据结构。由于对象的虚拟地址通过virt_to_pfg()找到相应
的pfn,然后通过pfn_to_page()由pfn找到对应的page结构。在一个slab中,第一个页面的page结构中page->slab_cache
指向整个struct kmem_cache数据结构,查看cache_from_obj的实现*/
cachep = cache_from_obj(cachep, objp);
if (!cachep)
return;
/*关闭本地CPU中断*/
local_irq_save(flags);
debug_check_no_locks_freed(objp, cachep->object_size);
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, cachep->object_size);
/*查看__cahce_free函数实现,往下看*/
__cache_free(cachep, objp, _RET_IP_);
local_irq_restore(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
cache_from_obj()函数的实现:确定地址x对应page->slab_cache是否等于s
[kmem_cache_free()->cache_from_obj()]
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{
struct kmem_cache *cachep;
struct page *page;
/*
* When kmemcg is not being used, both assignments should return the
* same value. but we don't want to pay the assignment price in that
* case. If it is not compiled in, the compiler should be smart enough
* to not do even the assignment. In that case, slab_equal_or_root
* will also be a constant.
*/
if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
return s;
page = virt_to_head_page(x);
cachep = page->slab_cache;
if (slab_equal_or_root(cachep, s))
return cachep;
pr_err("%s: Wrong slab cache. %s but object is from %s\n",
__func__, cachep->name, s->name);
WARN_ON_ONCE(1);
return s;
}
static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);
/*
* We don't need to worry about synchronization of tail flag
* when we call virt_to_head_page() since it is only called for
* already allocated page and this page won't be freed until
* this virt_to_head_page() is finished. So use _fast variant.
*/
return compound_head_fast(page);
}
#define virt_to_page(addr) pfn_to_page(virt_to_pfn(addr))
回到kmem_cache_free函数
__cache_free()函数实现:
[kmem_cache_free()->__cache_free()]
/*
* Release an obj back to its cache. If the obj has a constructed state, it must
* be in this state _before_ it is released. Called with disabled ints.
*/
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
objp = cache_free_debugcheck(cachep, objp, caller);
kmemcheck_slab_free(cachep, objp, cachep->object_size);
/*
* Skip calling cache_free_alien() when the platform is not numa.
* This will avoid cache misses that happen while accessing slabp (which
* is per page memory reference) to get nodeid. Instead use a global
* variable to skip the call, which is mostly likely to be present in
* the cache.
*/
if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
return;
if (ac->avail < ac->limit) {
STATS_INC_FREEHIT(cachep);
} else {
STATS_INC_FREEMISS(cachep);
/*当本地对象缓冲池的空闲对象ac->avail大于ac->limit阈值时,就会调用cache_flusharray()函数做flush
动作去尝试回收空闲对象。ac->limit阈值的计算在enable_cpucache()函数中进行,在我们的例子中,ac->limit
为120,ac->batchcount为60。我们查看此函数如何实现*/
cache_flusharray(cachep, ac);
}
ac_put_obj(cachep, ac, objp);/*把对象释放到本地对象缓冲池ac中。ac->entry[ac->avail++] = objp,释放过程已经结束*/
}
回到kmem_cache_free函数
cache_flusharray()函数实现:本地对象缓冲池的空闲对象大于阈值时,尝试回收空闲对象到共享缓冲池。
[kmem_cache_free()->__cache_free()->cache_flusharray()]
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
LIST_HEAD(list);
batchcount = ac->batchcount;
#if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
n = get_node(cachep, node);
spin_lock(&n->list_lock);
/*判断是否有共享缓冲池*/
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
if (max) {
if (batchcount > max)
batchcount = max;
/*将本地对象缓冲池的空闲对象复制到共享对象缓冲池中,这里复制batchcount个空闲对象*/
memcpy(&(shared_array->entry[shared_array->avail]),
ac->entry, sizeof(void *) * batchcount);
shared_array->avail += batchcount;
goto free_done;
}
}
/*如果共享对象缓冲池中的空闲对象数量大于limit阈值,那么就调用free_block函数,
free_block函数会主动释放batchcount个空闲对象。如果slab没有了活跃对象(即page->active == 0),
并且slab节点中所有空闲对象数目n->free_objects超过了n->free_limit阈值,那么调用slabs_destroy()
函数来销毁这个slab。page->active用于记录活跃slab对象的计数,slab_get_obj()函数分配一个slab对象
时会增加该技术,slab_put_obj()函数释放一个slab对象时会递减该计数。下面查看free_block实现
*/
free_block(cachep, ac->entry, batchcount, node, &list);
free_done:
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
ac->avail -= batchcount;
/*本地对象缓冲池剩余的空闲对象前移到buffer的头部*/
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
free_block()函数实现:
[kmem_cache_free()->__cache_free()->cache_flusharray()->free_block()]
/*
* Caller needs to acquire correct kmem_cache_node's list_lock
* @list: List of detached free slabs should be freed by caller
*/
static void free_block(struct kmem_cache *cachep, void **objpp,
int nr_objects, int node, struct list_head *list)
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
for (i = 0; i < nr_objects; i++) {
void *objp;
struct page *page;
clear_obj_pfmemalloc(&objpp[i]);
objp = objpp[i];
page = virt_to_head_page(objp);
list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);
STATS_DEC_ACTIVE(cachep);
n->free_objects++;
/* fixup slab chains */
if (page->active == 0) {
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
list_add_tail(&page->lru, list);
} else {
list_add(&page->lru, &n->slabs_free);
}
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
list_add_tail(&page->lru, &n->slabs_partial);
}
}
}
回到cache_flusharray()函数