FreeBSD zone allocator

[size=medium]FreeBSD zone allocator 实现为一个 slab allocator ,但在细节方面和原始的 slab allocator 不太一样。[/size]
[size=medium]在 zone allocator 中,keg 充当着 back end 的角色,zone 起着 front end 的作用。[/size]
[size=medium]主要的数据结构有:struct uma_keg , struct uma_zone , struct uma_slab , struct uma_cache , struct uma_bucket。[/size]

/* 
* Keg management structure
*
* TODO: Optimize for cache line size
*
*/
struct uma_keg {
LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
struct mtx uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */
LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */
u_int32_t uk_recurse; /* Allocation recursion count */
u_int32_t uk_align; /* Alignment mask */
u_int32_t uk_pages; /* Total page count */
u_int32_t uk_free; /* Count of items free in slabs */
u_int32_t uk_size; /* Requested size of each item */
u_int32_t uk_rsize; /* Real size of each item */
u_int32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
uma_alloc uk_allocf; /* Allocation function */
uma_free uk_freef; /* Free routine */
struct vm_object *uk_obj; /* Zone specific object */
vm_offset_t uk_kva; /* Base kva for zones with objs */
uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */
u_int16_t uk_pgoff; /* Offset to uma_slab struct */
u_int16_t uk_ppera; /* pages per allocation from backend */
u_int16_t uk_ipers; /* Items per slab */
};

[size=medium]keg 存储着三类 slab 的链表:uk_free_slab(完全没有使用的 slab ),uk_part_slab(部分使用的 slab),uk_full_slab(全部使用了的 slab)。uk_link链表连接这系统中所有的 keg ,链表头存储在 uma_kegs中,以便 zone_foreach 遍历所有的 keg。[/size]

/* Page management structure */
/* Sorry for the union, but space efficiency is important */
struct uma_slab_head {
uma_keg_t us_keg; /* Keg we live in */
union {
LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */
unsigned long _us_size; /* Size of allocation */
} us_type;
SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */
u_int8_t *us_data; /* First item */
u_int8_t us_flags; /* Page flags see uma.h */
u_int8_t us_freecount; /* How many are free? */
u_int8_t us_firstfree; /* First free item index */
};
/* The standard slab structure */
struct uma_slab {
struct uma_slab_head us_head; /* slab header data */
struct {
u_int8_t us_item;
} us_freelist[1]; /* actual number bigger */
};
/*
* The slab structure for UMA_ZONE_REFCNT zones for whose items we
* maintain reference counters in the slab for.
*/
struct uma_slab_refcnt {
struct uma_slab_head us_head; /* slab header data */
struct {
u_int8_t us_item;
u_int32_t us_refcnt;
} us_freelist[1]; /* actual number bigger */
};
#define us_keg us_head.us_keg
#define us_link us_head.us_type._us_link
#define us_size us_head.us_type._us_size
#define us_hlink us_head.us_hlink
#define us_data us_head.us_data
#define us_flags us_head.us_flags
#define us_freecount us_head.us_freecount
#define us_firstfree us_head.us_firstfree

[size=medium]us_link 连接着所有属于同类的 slab ,当用 uma_large_malloc 分配时,slab 不被 keg 管理,用 us_size 保存数据的大小。[/size]

/*
* Structures for per cpu queues.
*/
struct uma_bucket {
LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */
int16_t ub_cnt; /* Count of free items. */
int16_t ub_entries; /* Max items. */
void *ub_bucket[]; /* actual allocation storage */
};
typedef struct uma_bucket * uma_bucket_t;
struct uma_cache {
uma_bucket_t uc_freebucket; /* Bucket we're freeing to */
uma_bucket_t uc_allocbucket; /* Bucket to allocate from */
u_int64_t uc_allocs; /* Count of allocations */
u_int64_t uc_frees; /* Count of frees */
};

[size=medium]当分配内存时,首先检查当前 CPU 的 uc_freebucket 和 uc_allocbucket 是否有缓存的 item;当释放内存时,首先检查当前 CPU 的 uc_freebucket 和 uc_allocbucket 是否有空槽可以缓存释放的 item。uc_allocs 和 uc_frees 保存着迄今为止只在 cache 内进行的分配和释放数。[/size]

/*
* Zone management structure
*
* TODO: Optimize for cache line size
*
*/
struct uma_zone {
char *uz_name; /* Text name of the zone */
struct mtx *uz_lock; /* Lock for the zone (keg's lock) */
uma_keg_t uz_keg; /* Our underlying Keg */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */
LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Discards memory */
u_int64_t uz_allocs; /* Total number of allocations */
u_int64_t uz_frees; /* Total number of frees */
u_int64_t uz_fails; /* Total number of alloc failures */
uint16_t uz_fills; /* Outstanding bucket fills */
uint16_t uz_count; /* Highest value ub_ptr can have */
/*
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
*/
struct uma_cache uz_cpu[1]; /* Per cpu caches */
};

[size=medium]uz_full_bucket 链表连接着所有缓存着 item 的 bucket ,uz_free_bucket 链表连接着所有没有缓存着 item 的 bucket。分配内存时,如 CPU 的 cache 内没有缓存的 item 时,可将 uz_full_bucket 链表中的 bucket 移到 CPU 的 cache 内以分配内存;释放内存时,如 CPU 的 cache 内没有空槽可以缓存释放的 item ,可将 uz_free_bucket 链表中的bucket 移到 CPU 的 cache 内以缓存内存。[/size]

[size=medium]对 keg 进行 grow 操作的主要函数有:uma_zone_slab,slab_zalloc。[/size]

static uma_slab_t
uma_zone_slab(uma_zone_t zone, int flags)
{
uma_slab_t slab;
uma_keg_t keg;
keg = zone->uz_keg;
/*
* This is to prevent us from recursively trying to allocate
* buckets. The problem is that if an allocation forces us to
* grab a new bucket we will call page_alloc, which will go off
* and cause the vm to allocate vm_map_entries. If we need new
* buckets there too we will recurse in kmem_alloc and bad
* things happen. So instead we return a NULL bucket, and make
* the code that allocates buckets smart enough to deal with it
*
* XXX: While we want this protection for the bucket zones so that
* recursion from the VM is handled (and the calling code that
* allocates buckets knows how to deal with it), we do not want
* to prevent allocation from the slab header zones (slabzone
* and slabrefzone) if uk_recurse is not zero for them. The
* reason is that it could lead to NULL being returned for
* slab header allocations even in the M_WAITOK case, and the
* caller can't handle that.
*/
if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
if (zone != slabzone && zone != slabrefzone && zone != zones)
return (NULL);
slab = NULL;
for (;;) {
/*
* Find a slab with some space. Prefer slabs that are partially
* used over those that are totally full. This helps to reduce
* fragmentation.
*/
if (keg->uk_free != 0) {
if (!LIST_EMPTY(&keg->uk_part_slab)) {
slab = LIST_FIRST(&keg->uk_part_slab);
} else {
slab = LIST_FIRST(&keg->uk_free_slab);
LIST_REMOVE(slab, us_link);
LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
us_link);
}
return (slab);
}
/*
* M_NOVM means don't ask at all!
*/
if (flags & M_NOVM)
break;
if (keg->uk_maxpages &&
keg->uk_pages >= keg->uk_maxpages) {
keg->uk_flags |= UMA_ZFLAG_FULL;
if (flags & M_NOWAIT)
break;
else
msleep(keg, &keg->uk_lock, PVM,
"zonelimit", 0);
continue;
}
keg->uk_recurse++;
slab = slab_zalloc(zone, flags);
keg->uk_recurse--;
/*
* If we got a slab here it's safe to mark it partially used
* and return. We assume that the caller is going to remove
* at least one item.
*/
if (slab) {
LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
return (slab);
}
/*
* We might not have been able to get a slab but another cpu
* could have while we were unlocked. Check again before we
* fail.
*/
if (flags & M_NOWAIT)
flags |= M_NOVM;
}
return (slab);
}


/*
* Allocate a new slab for a zone. This does not insert the slab onto a list.
*
* Arguments:
* zone The zone to allocate slabs for
* wait Shall we wait?
*
* Returns:
* The slab that was allocated or NULL if there is no memory and the
* caller specified M_NOWAIT.
*/
static uma_slab_t
slab_zalloc(uma_zone_t zone, int wait)
{
uma_slabrefcnt_t slabref;
uma_slab_t slab;
uma_keg_t keg;
u_int8_t *mem;
u_int8_t flags;
int i;
slab = NULL;
keg = zone->uz_keg;
#ifdef UMA_DEBUG
printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
#endif
ZONE_UNLOCK(zone);
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
if (slab == NULL) {
ZONE_LOCK(zone);
return NULL;
}
}
/*
* This reproduces the old vm_zone behavior of zero filling pages the
* first time they are added to a zone.
*
* Malloced items are zeroed in uma_zalloc.
*/
if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
wait |= M_ZERO;
else
wait &= ~M_ZERO;
mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
&flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
uma_zfree_internal(keg->uk_slabzone, slab, NULL,
SKIP_NONE, ZFREE_STATFREE);
ZONE_LOCK(zone);
return (NULL);
}
/* Point the slab into the allocated memory */
if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
slab = (uma_slab_t )(mem + keg->uk_pgoff);
if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
(keg->uk_flags & UMA_ZONE_REFCNT))
for (i = 0; i < keg->uk_ppera; i++)
vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
slab->us_keg = keg;
slab->us_data = mem;
slab->us_freecount = keg->uk_ipers;
slab->us_firstfree = 0;
slab->us_flags = flags;
if (keg->uk_flags & UMA_ZONE_REFCNT) {
slabref = (uma_slabrefcnt_t)slab;
for (i = 0; i < keg->uk_ipers; i++) {
slabref->us_freelist[i].us_refcnt = 0;
slabref->us_freelist[i].us_item = i+1;
}
} else {
for (i = 0; i < keg->uk_ipers; i++)
slab->us_freelist[i].us_item = i+1;
}
if (keg->uk_init != NULL) {
for (i = 0; i < keg->uk_ipers; i++)
if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
keg->uk_size, wait) != 0)
break;
if (i != keg->uk_ipers) {
if (keg->uk_fini != NULL) {
for (i--; i > -1; i--)
keg->uk_fini(slab->us_data +
(keg->uk_rsize * i),
keg->uk_size);
}
if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
(keg->uk_flags & UMA_ZONE_REFCNT)) {
vm_object_t obj;
if (flags & UMA_SLAB_KMEM)
obj = kmem_object;
else if (flags & UMA_SLAB_KERNEL)
obj = kernel_object;
else
obj = NULL;
for (i = 0; i < keg->uk_ppera; i++)
vsetobj((vm_offset_t)mem +
(i * PAGE_SIZE), obj);
}
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
uma_zfree_internal(keg->uk_slabzone, slab,
NULL, SKIP_NONE, ZFREE_STATFREE);
keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
flags);
ZONE_LOCK(zone);
return (NULL);
}
}
ZONE_LOCK(zone);
if (keg->uk_flags & UMA_ZONE_HASH)
UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
keg->uk_pages += keg->uk_ppera;
keg->uk_free += keg->uk_ipers;
return (slab);
}

[size=medium]uma_zone_slab 函数返回没有全部使用了的 slab 。如果调用 uma_zone_slab 时,keg 的 uk_free 为0(此时,uk_free_slab 和 uk_part_slab 链表均为空),则调用 slab_zalloc 分配新的 slab 。slab_zalloc 通过调用 keg->uk_allocf来分配 item 块内存,根据 keg->uk_flag 的标识来决定 slab 是从 keg->uk_slabzone 中分配还是存储在 item 块的最后面。如果,slab_zalloc 分配 item 块成功,keg->uk_pages 将增加 keg->uk_ppera 。当对相同的 keg 调用 uma_zone_slab 时,如果 uk_free 为 0,当 uk_maxpages 设置并且 uk_pages 大于等于 uk_maxpages 时,函数将阻塞在 uk_lock 上(见 uma_zone_slab 1977-2005行)。[/size]

[size=medium]uma_zalloc_arg 函数从 zone 分配 item 。[/size]

void *
uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
{
void *item;
uma_cache_t cache;
uma_bucket_t bucket;
int cpu;
/* This is the fast path allocation */
#ifdef UMA_DEBUG_ALLOC_1
printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
#endif
CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
zone->uz_name, flags);
if (flags & M_WAITOK) {
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
}
/*
* If possible, allocate from the per-CPU cache. There are two
* requirements for safe access to the per-CPU cache: (1) the thread
* accessing the cache must not be preempted or yield during access,
* and (2) the thread must not migrate CPUs without switching which
* cache it accesses. We rely on a critical section to prevent
* preemption and migration. We release the critical section in
* order to acquire the zone mutex if we are unable to allocate from
* the current cache; when we re-acquire the critical section, we
* must detect and handle migration if it has occurred.
*/
zalloc_restart:
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
zalloc_start:
bucket = cache->uc_allocbucket;
if (bucket) {
if (bucket->ub_cnt > 0) {
bucket->ub_cnt--;
item = bucket->ub_bucket[bucket->ub_cnt];
#ifdef INVARIANTS
bucket->ub_bucket[bucket->ub_cnt] = NULL;
#endif
KASSERT(item != NULL,
("uma_zalloc: Bucket pointer mangled."));
cache->uc_allocs++;
critical_exit();
#ifdef INVARIANTS
ZONE_LOCK(zone);
uma_dbg_alloc(zone, NULL, item);
ZONE_UNLOCK(zone);
#endif
if (zone->uz_ctor != NULL) {
if (zone->uz_ctor(item, zone->uz_keg->uk_size,
udata, flags) != 0) {
uma_zfree_internal(zone, item, udata,
SKIP_DTOR, ZFREE_STATFAIL |
ZFREE_STATFREE);
return (NULL);
}
}
if (flags & M_ZERO)
bzero(item, zone->uz_keg->uk_size);
return (item);
} else if (cache->uc_freebucket) {
/*
* We have run out of items in our allocbucket.
* See if we can switch with our free bucket.
*/
if (cache->uc_freebucket->ub_cnt > 0) {
#ifdef UMA_DEBUG_ALLOC
printf("uma_zalloc: Swapping empty with"
" alloc.\n");
#endif
bucket = cache->uc_freebucket;
cache->uc_freebucket = cache->uc_allocbucket;
cache->uc_allocbucket = bucket;
goto zalloc_start;
}
}
}
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
* we must go back to the zone. This requires the zone lock, so we
* must drop the critical section, then re-acquire it when we go back
* to the cache. Since the critical section is released, we may be
* preempted or migrate. As such, make sure not to maintain any
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
critical_exit();
ZONE_LOCK(zone);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
bucket = cache->uc_allocbucket;
if (bucket != NULL) {
if (bucket->ub_cnt > 0) {
ZONE_UNLOCK(zone);
goto zalloc_start;
}
bucket = cache->uc_freebucket;
if (bucket != NULL && bucket->ub_cnt > 0) {
ZONE_UNLOCK(zone);
goto zalloc_start;
}
}
/* Since we have locked the zone we may as well send back our stats */
zone->uz_allocs += cache->uc_allocs;
cache->uc_allocs = 0;
zone->uz_frees += cache->uc_frees;
cache->uc_frees = 0;
/* Our old one is now a free bucket */
if (cache->uc_allocbucket) {
KASSERT(cache->uc_allocbucket->ub_cnt == 0,
("uma_zalloc_arg: Freeing a non free bucket."));
LIST_INSERT_HEAD(&zone->uz_free_bucket,
cache->uc_allocbucket, ub_link);
cache->uc_allocbucket = NULL;
}
/* Check the free list for a new alloc bucket */
if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
LIST_REMOVE(bucket, ub_link);
cache->uc_allocbucket = bucket;
ZONE_UNLOCK(zone);
goto zalloc_start;
}
/* We are no longer associated with this CPU. */
critical_exit();
/* Bump up our uz_count so we get here less */
if (zone->uz_count < BUCKET_MAX)
zone->uz_count++;
/*
* Now lets just fill a bucket and put it on the free list. If that
* works we'll restart the allocation from the begining.
*/
if (uma_zalloc_bucket(zone, flags)) {
ZONE_UNLOCK(zone);
goto zalloc_restart;
}
ZONE_UNLOCK(zone);
/*
* We may not be able to get a bucket so return an actual item.
*/
#ifdef UMA_DEBUG
printf("uma_zalloc_arg: Bucketzone returned NULL\n");
#endif
return (uma_zalloc_internal(zone, udata, flags));
}

[size=medium]uma_zalloc_arg 首先尝试从当前 CPU cache 中分配 item 。如失败:若 zone 的 uz_full_bucket 链表不为空,则将其首元素移到 CPU cache 中以供分配;否则调用 uma_zalloc_bucket 装填一个新的 bucket ,并将之插入 uz_full_bucket 链表。如果 uma_zalloc_bucket 也失败,则调用 uma_zalloc_internal 分配内存。[/size]

static int
uma_zalloc_bucket(uma_zone_t zone, int flags)
{
uma_bucket_t bucket;
uma_slab_t slab;
int16_t saved;
int max, origflags = flags;
/*
* Try this zone's free list first so we don't allocate extra buckets.
*/
if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
KASSERT(bucket->ub_cnt == 0,
("uma_zalloc_bucket: Bucket on free list is not empty."));
LIST_REMOVE(bucket, ub_link);
} else {
int bflags;
bflags = (flags & ~M_ZERO);
if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
bflags |= M_NOVM;
ZONE_UNLOCK(zone);
bucket = bucket_alloc(zone->uz_count, bflags);
ZONE_LOCK(zone);
}
if (bucket == NULL)
return (0);
#ifdef SMP
/*
* This code is here to limit the number of simultaneous bucket fills
* for any given zone to the number of per cpu caches in this zone. This
* is done so that we don't allocate more memory than we really need.
*/
if (zone->uz_fills >= mp_ncpus)
goto done;
#endif
zone->uz_fills++;
max = MIN(bucket->ub_entries, zone->uz_count);
/* Try to keep the buckets totally full */
saved = bucket->ub_cnt;
while (bucket->ub_cnt < max &&
(slab = uma_zone_slab(zone, flags)) != NULL) {
while (slab->us_freecount && bucket->ub_cnt < max) {
bucket->ub_bucket[bucket->ub_cnt++] =
uma_slab_alloc(zone, slab);
}
/* Don't block on the next fill */
flags |= M_NOWAIT;
}
/*
* We unlock here because we need to call the zone's init.
* It should be safe to unlock because the slab dealt with
* above is already on the appropriate list within the keg
* and the bucket we filled is not yet on any list, so we
* own it.
*/
if (zone->uz_init != NULL) {
int i;
ZONE_UNLOCK(zone);
for (i = saved; i < bucket->ub_cnt; i++)
if (zone->uz_init(bucket->ub_bucket[i],
zone->uz_keg->uk_size, origflags) != 0)
break;
/*
* If we couldn't initialize the whole bucket, put the
* rest back onto the freelist.
*/
if (i != bucket->ub_cnt) {
int j;
for (j = i; j < bucket->ub_cnt; j++) {
uma_zfree_internal(zone, bucket->ub_bucket[j],
NULL, SKIP_FINI, 0);
#ifdef INVARIANTS
bucket->ub_bucket[j] = NULL;
#endif
}
bucket->ub_cnt = i;
}
ZONE_LOCK(zone);
}
zone->uz_fills--;
if (bucket->ub_cnt != 0) {
LIST_INSERT_HEAD(&zone->uz_full_bucket,
bucket, ub_link);
return (1);
}
#ifdef SMP
done:
#endif
bucket_free(bucket);
return (0);
}
/*
* Allocates an item for an internal zone
*
* Arguments
* zone The zone to alloc for.
* udata The data to be passed to the constructor.
* flags M_WAITOK, M_NOWAIT, M_ZERO.
*
* Returns
* NULL if there is no memory and M_NOWAIT is set
* An item if successful
*/
static void *
uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
{
uma_keg_t keg;
uma_slab_t slab;
void *item;
item = NULL;
keg = zone->uz_keg;
#ifdef UMA_DEBUG_ALLOC
printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
#endif
ZONE_LOCK(zone);
slab = uma_zone_slab(zone, flags);
if (slab == NULL) {
zone->uz_fails++;
ZONE_UNLOCK(zone);
return (NULL);
}
item = uma_slab_alloc(zone, slab);
zone->uz_allocs++;
ZONE_UNLOCK(zone);
/*
* We have to call both the zone's init (not the keg's init)
* and the zone's ctor. This is because the item is going from
* a keg slab directly to the user, and the user is expecting it
* to be both zone-init'd as well as zone-ctor'd.
*/
if (zone->uz_init != NULL) {
if (zone->uz_init(item, keg->uk_size, flags) != 0) {
uma_zfree_internal(zone, item, udata, SKIP_FINI,
ZFREE_STATFAIL | ZFREE_STATFREE);
return (NULL);
}
}
if (zone->uz_ctor != NULL) {
if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
uma_zfree_internal(zone, item, udata, SKIP_DTOR,
ZFREE_STATFAIL | ZFREE_STATFREE);
return (NULL);
}
}
if (flags & M_ZERO)
bzero(item, keg->uk_size);
return (item);
}

[size=medium]uma_zalloc_internal 函数先调用 uma_zone_slab 函数返回一个未用完的 slab ,再调用 uma_slab_alloc 函数从这个 slab 中分配一个 item 。uma_zone_slab 函数假设调用程序至少从返回的 slab 里分配一个 item ,所以如果返回的 slab 是完全没有使用的,则在返回之前将之从 uk_free_slab 中移除,并插入 uk_part_slab 。uma_slab_alloc 函数假设 slab 是部分使用了的 slab ,所以只处理当 slab->us_freecount 等于 0 的情况。[/size]

[size=medium]uma_zfree_arg 将从 zone 分配的 item 返回给 zone 。[/size]
void  
uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
{
uma_keg_t keg;
uma_cache_t cache;
uma_bucket_t bucket;
int bflags;
int cpu;
keg = zone->uz_keg;
#ifdef UMA_DEBUG_ALLOC_1
printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
#endif
CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
zone->uz_name);
if (zone->uz_dtor)
zone->uz_dtor(item, keg->uk_size, udata);
#ifdef INVARIANTS
ZONE_LOCK(zone);
if (keg->uk_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
ZONE_UNLOCK(zone);
#endif
/*
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
if (keg->uk_flags & UMA_ZFLAG_FULL)
goto zfree_internal;
/*
* If possible, free to the per-CPU cache. There are two
* requirements for safe access to the per-CPU cache: (1) the thread
* accessing the cache must not be preempted or yield during access,
* and (2) the thread must not migrate CPUs without switching which
* cache it accesses. We rely on a critical section to prevent
* preemption and migration. We release the critical section in
* order to acquire the zone mutex if we are unable to free to the
* current cache; when we re-acquire the critical section, we must
* detect and handle migration if it has occurred.
*/
zfree_restart:
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
zfree_start:
bucket = cache->uc_freebucket;
if (bucket) {
/*
* Do we have room in our bucket? It is OK for this uz count
* check to be slightly out of sync.
*/
if (bucket->ub_cnt < bucket->ub_entries) {
KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
("uma_zfree: Freeing to non free bucket index."));
bucket->ub_bucket[bucket->ub_cnt] = item;
bucket->ub_cnt++;
cache->uc_frees++;
critical_exit();
return;
} else if (cache->uc_allocbucket) {
#ifdef UMA_DEBUG_ALLOC
printf("uma_zfree: Swapping buckets.\n");
#endif
/*
* We have run out of space in our freebucket.
* See if we can switch with our alloc bucket.
*/
if (cache->uc_allocbucket->ub_cnt <
cache->uc_freebucket->ub_cnt) {
bucket = cache->uc_freebucket;
cache->uc_freebucket = cache->uc_allocbucket;
cache->uc_allocbucket = bucket;
goto zfree_start;
}
}
}
/*
* We can get here for two reasons:
*
* 1) The buckets are NULL
* 2) The alloc and free buckets are both somewhat full.
*
* We must go back the zone, which requires acquiring the zone lock,
* which in turn means we must release and re-acquire the critical
* section. Since the critical section is released, we may be
* preempted or migrate. As such, make sure not to maintain any
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
critical_exit();
ZONE_LOCK(zone);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
if (cache->uc_freebucket != NULL) {
if (cache->uc_freebucket->ub_cnt <
cache->uc_freebucket->ub_entries) {
ZONE_UNLOCK(zone);
goto zfree_start;
}
if (cache->uc_allocbucket != NULL &&
(cache->uc_allocbucket->ub_cnt <
cache->uc_freebucket->ub_cnt)) {
ZONE_UNLOCK(zone);
goto zfree_start;
}
}
/* Since we have locked the zone we may as well send back our stats */
zone->uz_allocs += cache->uc_allocs;
cache->uc_allocs = 0;
zone->uz_frees += cache->uc_frees;
cache->uc_frees = 0;
bucket = cache->uc_freebucket;
cache->uc_freebucket = NULL;
/* Can we throw this on the zone full list? */
if (bucket != NULL) {
#ifdef UMA_DEBUG_ALLOC
printf("uma_zfree: Putting old bucket on the free list.\n");
#endif
/* ub_cnt is pointing to the last free item */
KASSERT(bucket->ub_cnt != 0,
("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
LIST_INSERT_HEAD(&zone->uz_full_bucket,
bucket, ub_link);
}
if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
LIST_REMOVE(bucket, ub_link);
ZONE_UNLOCK(zone);
cache->uc_freebucket = bucket;
goto zfree_start;
}
/* We are no longer associated with this CPU. */
critical_exit();
/* And the zone.. */
ZONE_UNLOCK(zone);
#ifdef UMA_DEBUG_ALLOC
printf("uma_zfree: Allocating new free bucket.\n");
#endif
bflags = M_NOWAIT;
if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
bflags |= M_NOVM;
bucket = bucket_alloc(zone->uz_count, bflags);
if (bucket) {
ZONE_LOCK(zone);
LIST_INSERT_HEAD(&zone->uz_free_bucket,
bucket, ub_link);
ZONE_UNLOCK(zone);
goto zfree_restart;
}
/*
* If nothing else caught this, we'll just do an internal free.
*/
zfree_internal:
uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
return;
}

/* 
* Frees an item to an INTERNAL zone or allocates a free bucket
*
* Arguments:
* zone The zone to free to
* item The item we're freeing
* udata User supplied data for the dtor
* skip Skip dtors and finis
*/
static void
uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
enum zfreeskip skip, int flags)
{
uma_slab_t slab;
uma_slabrefcnt_t slabref;
uma_keg_t keg;
u_int8_t *mem;
u_int8_t freei;
keg = zone->uz_keg;
if (skip < SKIP_DTOR && zone->uz_dtor)
zone->uz_dtor(item, keg->uk_size, udata);
if (skip < SKIP_FINI && zone->uz_fini)
zone->uz_fini(item, keg->uk_size);
ZONE_LOCK(zone);
if (flags & ZFREE_STATFAIL)
zone->uz_fails++;
if (flags & ZFREE_STATFREE)
zone->uz_frees++;
if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
if (keg->uk_flags & UMA_ZONE_HASH)
slab = hash_sfind(&keg->uk_hash, mem);
else {
mem += keg->uk_pgoff;
slab = (uma_slab_t)mem;
}
} else {
slab = (uma_slab_t)udata;
}
/* Do we need to remove from any lists? */
if (slab->us_freecount+1 == keg->uk_ipers) {
LIST_REMOVE(slab, us_link);
LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
} else if (slab->us_freecount == 0) {
LIST_REMOVE(slab, us_link);
LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
}
/* Slab management stuff */
freei = ((unsigned long)item - (unsigned long)slab->us_data)
/ keg->uk_rsize;
#ifdef INVARIANTS
if (!skip)
uma_dbg_free(zone, slab, item);
#endif
if (keg->uk_flags & UMA_ZONE_REFCNT) {
slabref = (uma_slabrefcnt_t)slab;
slabref->us_freelist[freei].us_item = slab->us_firstfree;
} else {
slab->us_freelist[freei].us_item = slab->us_firstfree;
}
slab->us_firstfree = freei;
slab->us_freecount++;
/* Zone statistics */
keg->uk_free++;
if (keg->uk_flags & UMA_ZFLAG_FULL) {
if (keg->uk_pages < keg->uk_maxpages)
keg->uk_flags &= ~UMA_ZFLAG_FULL;
/*
* We can handle one more allocation. Since we're clearing ZFLAG_FULL,
* wake up all procs blocked on pages. This should be uncommon, so
* keeping this simple for now (rather than adding count of blocked
* threads etc).
*/
wakeup(keg);
}
ZONE_UNLOCK(zone);
}

[size=medium]uma_zfree_arg 与 uma_zalloc_arg 类似,首先尝试将 item 缓存在 当前 CPU cache 中。如失败:若 uz_free_bucket 链表不为空,则将其首元素移到 CPU cache 中以缓存 item ;否则,调用 bucket_alloc 分配一个空的 bucket ,并将之插入 uz_free_bucket 链表。如果 bucket_alloc 也失败,则调用 uma_zfree_internal 释放 item 。不过 uma_zfree_arg 在进行这些动作之前,先检查 keg 的可分配页面数是否达到上限,如是,则直接调用 uma_zfree_internal 释放 item ,因为 uma_zfree_internal 检查 keg 的可分配页面数是否已经降到上限数下了,并可唤醒阻塞在 keg 上的线程。[/size]
/* 
* Drain the cached buckets from a zone. Expects a locked zone on entry.
*/
static void
bucket_cache_drain(uma_zone_t zone)
{
uma_bucket_t bucket;
/*
* Drain the bucket queues and free the buckets, we just keep two per
* cpu (alloc/free).
*/
while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
LIST_REMOVE(bucket, ub_link);
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(bucket);
ZONE_LOCK(zone);
}
/* Now we do the free queue.. */
while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
LIST_REMOVE(bucket, ub_link);
bucket_free(bucket);
}
}
/*
* Frees pages from a zone back to the system. This is done on demand from
* the pageout daemon.
*
* Arguments:
* zone The zone to free pages from
* all Should we drain all items?
*
* Returns:
* Nothing.
*/
void
zone_drain(uma_zone_t zone)
{
struct slabhead freeslabs = { 0 };
uma_keg_t keg;
uma_slab_t slab;
uma_slab_t n;
u_int8_t flags;
u_int8_t *mem;
int i;
keg = zone->uz_keg;
/*
* We don't want to take pages from statically allocated zones at this
* time
*/
if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
return;
ZONE_LOCK(zone);
#ifdef UMA_DEBUG
printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
#endif
bucket_cache_drain(zone);
if (keg->uk_free == 0)
goto finished;
slab = LIST_FIRST(&keg->uk_free_slab);
while (slab) {
n = LIST_NEXT(slab, us_link);
/* We have no where to free these to */
if (slab->us_flags & UMA_SLAB_BOOT) {
slab = n;
continue;
}
LIST_REMOVE(slab, us_link);
keg->uk_pages -= keg->uk_ppera;
keg->uk_free -= keg->uk_ipers;
if (keg->uk_flags & UMA_ZONE_HASH)
UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
slab = n;
}
finished:
ZONE_UNLOCK(zone);
while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
if (keg->uk_fini)
for (i = 0; i < keg->uk_ipers; i++)
keg->uk_fini(
slab->us_data + (keg->uk_rsize * i),
keg->uk_size);
flags = slab->us_flags;
mem = slab->us_data;
if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
(keg->uk_flags & UMA_ZONE_REFCNT)) {
vm_object_t obj;
if (flags & UMA_SLAB_KMEM)
obj = kmem_object;
else if (flags & UMA_SLAB_KERNEL)
obj = kernel_object;
else
obj = NULL;
for (i = 0; i < keg->uk_ppera; i++)
vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
obj);
}
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
uma_zfree_internal(keg->uk_slabzone, slab, NULL,
SKIP_NONE, ZFREE_STATFREE);
#ifdef UMA_DEBUG
printf("%s: Returning %d bytes.\n",
zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
#endif
keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
}
}

[size=medium]zone_drain 函数通过调用 bucket_cache_drain 将 zone 中的 uz_full_bucket 缓存的 item 返还给 keg (CPU cache 中的 item 不返还),并释放 uz_full_bucket 和 uz_free_bucket 连接的所有 bucket,再将 keg 用 uk_free_slab 连接起来的 pages 释放给系统。[/size]
static void  
zone_foreach(void (*zfunc)(uma_zone_t))
{
uma_keg_t keg;
uma_zone_t zone;
mtx_lock(&uma_mtx);
LIST_FOREACH(keg, &uma_kegs, uk_link) {
LIST_FOREACH(zone, &keg->uk_zones, uz_link)
zfunc(zone);
}
mtx_unlock(&uma_mtx);
}

[size=medium]zone_foreach 对系统中所有被 uma_kegs 连接的 zone 调用 zfunc 函数。[/size]
void  
uma_reclaim(void)
{
#ifdef UMA_DEBUG
printf("UMA: vm asked us to release pages!\n");
#endif
bucket_enable();
zone_foreach(zone_drain);
/*
* Some slabs may have been freed but this zone will be visited early
* we visit again so that we can free pages that are empty once other
* zones are drained. We have to do the same for buckets.
*/
zone_drain(slabzone);
zone_drain(slabrefzone);
bucket_zone_drain();
}

[size=medium]uma_reclaim 回收所有暂且不会被使用的页面,这个函数只能被 page out daemon 调用。[/size]
[size=medium]
参考文章:
* Jeff Bonwick, The Slab Allocator: An Object-Caching Kernel Memory Allocator (1994)
* FreeBSD zone(9) manual page[/size]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值