nginx通过自己实现的slab机制来减少内存的碎片化。
而nginx的slab机制相对于linux内核的slab机制就显得相对的简单。通过nginx可以更快的理解slab机制。
ngx_slab.h:
typedef struct ngx_slab_page_s ngx_slab_page_t;
struct ngx_slab_page_s {
uintptr_t slab;
ngx_slab_page_t *next;
uintptr_t prev;
};
typedef struct {
ngx_shmtx_sh_t lock; //互斥锁
size_t min_size;
size_t min_shift;
ngx_slab_page_t *pages;
ngx_slab_page_t *last;
ngx_slab_page_t free;
u_char *start;
u_char *end;
ngx_shmtx_t mutex;
u_char *log_ctx;
u_char zero;
unsigned log_nomem:1;
void *data;
void *addr;
} ngx_slab_pool_t;
头文件很简单,定义了两个结构体,一个
ngx_slab.c:
void
ngx_slab_init(ngx_slab_pool_t *pool)
{
...
pool->min_size = 1 << pool->min_shift;
p = (u_char *) pool + sizeof(ngx_slab_pool_t);
size = pool->end - p;
ngx_slab_junk(p, size);
slots = (ngx_slab_page_t *) p;
/**
*这里的n通过移位来计算有多少个ngx_slab_page_t的结构体
*由于均为2的整数,相当于做除法
**/
n = ngx_pagesize_shift - pool->min_shift;
//初始化n个ngx_slab_page_t结构体
for (i = 0; i < n; i++) {
slots[i].slab = 0;
slots[i].next = &slots[i];
slots[i].prev = 0;
}
p += n * sizeof(ngx_slab_page_t);
pages = (ngx_uint_t) (size / (ngx_pagesize + sizeof(ngx_slab_page_t)));
ngx_memzero(p, pages * sizeof(ngx_slab_page_t));
//将slab_page结构体挂载到ngx_slab_pool_t结构体上
pool->pages = (ngx_slab_page_t *) p;
pool->free.prev = 0;
pool->free.next = (ngx_slab_page_t *) p;
pool->pages->slab = pages;
pool->pages->next = &pool->free;
pool->pages->prev = (uintptr_t) &pool->free;
//真正可以用作数据储存的开始地址,进行内存对齐,提高程序运行速度
pool->start = (u_char *)
ngx_align_ptr((uintptr_t) p + pages * sizeof(ngx_slab_page_t),
ngx_pagesize);
//由于内存对齐可以会使pages减少,根据相应的减少量调整pages
m = pages - (pool->end - pool->start) / ngx_pagesize;
if (m > 0) {
pages -= m;
pool->pages->slab = pages;
}
pool->last = pool->pages + pages;
//默认为1
pool->log_nomem = 1;
pool->log_ctx = &pool->zero;
pool->zero = '\0';
}
可以看到nginx在初始化共享内存的内存池是比较简单的,而且为了提高程序的运行速度,nginx用了很多小技巧。
void *
ngx_slab_alloc(ngx_slab_pool_t *pool, size_t size)
{
void *p;
//申请共享内存需要上互斥锁
ngx_shmtx_lock(&pool->mutex);
p = ngx_slab_alloc_locked(pool, size);
//申请后立即释放
ngx_shmtx_unlock(&pool->mutex);
return p;
}
nginx对于申请的内存大小有着划分,在之前的ngx_slab_init可以看到计算slot 的n = ngx_pagesize_shift - pool->min_shift;(一般ngx_pagesize_shift为12,即一页大小为4096,min_shift为3,即最小为8,这里的单位为Byte),nginx划分的slot有8,16,32,64,128,256,512,1024,2048这9个等级。(若超过2048Byte,nginx直接返回一个页的大小,即4096Byte)
void *
ngx_slab_alloc_locked(ngx_slab_pool_t *pool, size_t size)
{
...
if (size > ngx_slab_max_size) {
//这里的ngx_slab_max_size为2048
...
page = ngx_slab_alloc_pages(pool, (size >> ngx_pagesize_shift)
+ ((size % ngx_pagesize) ? 1 : 0));
if (page) {
p = (page - pool->pages) << ngx_pagesize_shift;
p += (uintptr_t) pool->start;
} else {
p = 0;
}
goto done;
}
//若是大于8,则计算应使用哪个slot下面的内存
if (size > pool->min_size) {
shift = 1;
/**如申请33Byte
*通过计算可以得到应使用块大小为64Byte的slot
*即slot=6-3=3
**/
for (s = size - 1; s >>= 1; shift++) { /* void */ }
slot = shift - pool->min_shift;
} else {
//最小为8,即第一个slot
size = pool->min_size;
shift = pool->min_shift;
slot = 0;
}
...
slots = (ngx_slab_page_t *) ((u_char *)pool + \
sizeof(ngx_slab_pool_t));
page = slots[slot].next;
//判断是否有剩余的页
if (page->next != page) {
if (shift < ngx_slab_exact_shift) {
//当小于ngx_slab_exact_shift(64位系统为6,32位系统为5)
do {
//根据偏移量计算page的地址
/**这里的(page-pool-pages)使用一个小技巧
*即指针计算的时候会根据指针类型得到对应的数值
*如page[1]的地址相对于page[0]偏移24个字节
*&page[1]-&page[0]=1
*而(char*)&page[1]-(char*)&page[0]=24
**/
p = (page - pool->pages) << ngx_pagesize_shift;
bitmap = (uintptr_t *) (pool->start + p);
//计算bitmap的大小(占用多少个块)
map = (1 << (ngx_pagesize_shift - shift))
/ (sizeof(uintptr_t) * 8);
for (n = 0; n < map; n++) {
if (bitmap[n] != NGX_SLAB_BUSY) {
for (m = 1, i = 0; m; m <<= 1, i++) {
if ((bitmap[n] & m)) {
continue;
}
bitmap[n] |= m;
//计算未被占用的的块的位置
i = ((n * sizeof(uintptr_t) * 8) << \
shift) + (i << shift);
/*若此page里面的可用空间全满了
*则将此page脱离该slot
*即不能从page得到可用的内存
*/
if (bitmap[n] == NGX_SLAB_BUSY) {
for (n = n + 1; n < map; n++) {
if (bitmap[n] != NGX_SLAB_BUSY) {
p = (uintptr_t) bitmap + i;
goto done;
}
}
prev = (ngx_slab_page_t *)
(page->prev & ~NGX_SLAB_PAGE_MASK);
prev->next = page->next;
page->next->prev = page->prev;
page->next = NULL;
page->prev = NGX_SLAB_SMALL;
}
p = (uintptr_t) bitmap + i;
goto done;
}
}
}
page = page->next;
} while (page);
} else if (shift == ngx_slab_exact_shift) {
//当要分配的size位于ngx_slab_exact_shift时
//这些page不需要使用多个比bitmap来记录
//仅需一个sizeof(uintptr_t)来记录即可
do {
if (page->slab != NGX_SLAB_BUSY) {
for (m = 1, i = 0; m; m <<= 1, i++) {
if ((page->slab & m)) {
continue;
}
page->slab |= m;
if (page->slab == NGX_SLAB_BUSY) {
prev = (ngx_slab_page_t *)
(page->prev & ~NGX_SLAB_PAGE_MASK);
prev->next = page->next;
page->next->prev = page->prev;
page->next = NULL;
page->prev = NGX_SLAB_EXACT;
}
p = (page - pool->pages) << ngx_pagesize_shift;
p += i << shift;
p += (uintptr_t) pool->start;
goto done;
}
}
page = page->next;
} while (page);
} else { /* shift > ngx_slab_exact_shift */
//由于是2的倍数,大于ngx_slab_exact_shift的
//只需要用sizeof(uintptr_t)一半来储存就好
//nginx使用低位记录块的大小,高位记录位图
n = ngx_pagesize_shift - (page->slab & NGX_SLAB_SHIFT_MASK);
n = 1 << n;
n = ((uintptr_t) 1 << n) - 1;
mask = n << NGX_SLAB_MAP_SHIFT;
do {
if ((page->slab & NGX_SLAB_MAP_MASK) != mask) {
for (m = (uintptr_t) 1 << NGX_SLAB_MAP_SHIFT, i = 0;
m & mask;
m <<= 1, i++)
{
if ((page->slab & m)) {
continue;
}
page->slab |= m;
if ((page->slab & NGX_SLAB_MAP_MASK) == mask) {
prev = (ngx_slab_page_t *)
(page->prev & ~NGX_SLAB_PAGE_MASK);
prev->next = page->next;
page->next->prev = page->prev;
page->next = NULL;
page->prev = NGX_SLAB_BIG;
}
p = (page - pool->pages) << ngx_pagesize_shift;
p += i << shift;
p += (uintptr_t) pool->start;
goto done;
}
}
page = page->next;
} while (page);
}
}
//当page不足的时候,重新分配一页
page = ngx_slab_alloc_pages(pool, 1);
if (page) {
if (shift < ngx_slab_exact_shift) {
p = (page - pool->pages) << ngx_pagesize_shift;
bitmap = (uintptr_t *) (pool->start + p);
//计算块的大小
s = 1 << shift;
//计算需要多少块存储bitmap
n = (1 << (ngx_pagesize_shift - shift)) / 8 / s;
if (n == 0) {
n = 1;
}
//标志bitmap使用的块
bitmap[0] = (2 << n) - 1;
map = (1 << (ngx_pagesize_shift - shift)) / (sizeof(uintptr_t) * 8);
//置位未使用的块
for (i = 1; i < map; i++) {
bitmap[i] = 0;
}
//挂载到对应的slot上
page->slab = shift;
page->next = &slots[slot];
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_SMALL;
slots[slot].next = page;
//返回第一个未使用的块
p = ((page - pool->pages) << ngx_pagesize_shift) + s * n;
p += (uintptr_t) pool->start;
goto done;
} else if (shift == ngx_slab_exact_shift) {
//使用slab作为位图
page->slab = 1;
page->next = &slots[slot];
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_EXACT;
slots[slot].next = page;
p = (page - pool->pages) << ngx_pagesize_shift;
p += (uintptr_t) pool->start;
goto done;
} else { /* shift > ngx_slab_exact_shift */
//低位记录块大小
page->slab = ((uintptr_t) 1 << NGX_SLAB_MAP_SHIFT) | shift;
page->next = &slots[slot];
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_BIG;
//进行过内存对齐,因此至少低2位为0,即4的倍数
//nginx利用低2位记录该page的属性
slots[slot].next = page;
p = (page - pool->pages) << ngx_pagesize_shift;
p += (uintptr_t) pool->start;
goto done;
}
}
p = 0;
done:
ngx_log_debug1(NGX_LOG_DEBUG_ALLOC, ngx_cycle->log, 0, "slab alloc: %p", p);
return (void *) p;
}
举例:块大小为8Byte,需要用8个块来记录bitmap,分配bitmap如下:
(bitmap使用的块应该被标为已使用)
//calloc只是将申请到的page做清零的操作
void *
ngx_slab_calloc(ngx_slab_pool_t *pool, size_t size)
{
void *p;
ngx_shmtx_lock(&pool->mutex);
p = ngx_slab_calloc_locked(pool, size);
ngx_shmtx_unlock(&pool->mutex);
return p;
}
void *
ngx_slab_calloc_locked(ngx_slab_pool_t *pool, size_t size)
{
void *p;
p = ngx_slab_alloc_locked(pool, size);
if (p) {
ngx_memzero(p, size);
}
return p;
}
内存释放逻辑跟申请内存逻辑相反,也需要上锁,由于内存对齐,可以根据指针p来计算相应的页的位置,slot的位置以及块的位置等等。
//释放内存上锁
void
ngx_slab_free(ngx_slab_pool_t *pool, void *p)
{
ngx_shmtx_lock(&pool->mutex);
ngx_slab_free_locked(pool, p);
ngx_shmtx_unlock(&pool->mutex);
}
void
ngx_slab_free_locked(ngx_slab_pool_t *pool, void *p)
{
...
if ((u_char *) p < pool->start || (u_char *) p > pool->end) {
ngx_slab_error(pool, NGX_LOG_ALERT, "ngx_slab_free(): outside of pool");
goto fail;
}
//计算需要释放的页的位置
n = ((u_char *) p - pool->start) >> ngx_pagesize_shift;
page = &pool->pages[n];
//slab包含着块大小的信息
slab = page->slab;
//得到所释放的page的属性
type = page->prev & NGX_SLAB_PAGE_MASK;
switch (type) {
case NGX_SLAB_SMALL:
//得到块大小偏移
shift = slab & NGX_SLAB_SHIFT_MASK;
size = 1 << shift;
//由于内存对齐,p一定为size的倍数
if ((uintptr_t) p & (size - 1)) {
goto wrong_chunk;
}
//n为要释放的块的位置,m为位图中要释放块的位置
n = ((uintptr_t) p & (ngx_pagesize - 1)) >> shift;
m = (uintptr_t) 1 << (n & (sizeof(uintptr_t) * 8 - 1));
//计算使用的是哪个bitmap
n /= (sizeof(uintptr_t) * 8);
//bitmap的地址,由于内存对齐,只需要确定page的起始地址即可
bitmap = (uintptr_t *)
((uintptr_t) p & ~((uintptr_t) ngx_pagesize - 1));
if (bitmap[n] & m) {
//如果整个page已被使用,则将page挂载上对应的slot
if (page->next == NULL) {
slots = (ngx_slab_page_t *)
((u_char *) pool + sizeof(ngx_slab_pool_t));
slot = shift - pool->min_shift;
page->next = slots[slot].next;
slots[slot].next = page;
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_SMALL;
page->next->prev = (uintptr_t) page | NGX_SLAB_SMALL;
}
//将要释放的块在bitmap对应的位置置0
bitmap[n] &= ~m;
//计算使用bitmap占了多少个块
n = (1 << (ngx_pagesize_shift - shift)) / 8 / (1 << shift);
if (n == 0) {
n = 1;
}
//如果有还在使用
if (bitmap[0] & ~(((uintptr_t) 1 << n) - 1)) {
goto done;
}
map = (1 << (ngx_pagesize_shift - shift)) / (sizeof(uintptr_t) * 8);
for (n = 1; n < map; n++) {
if (bitmap[n]) {
goto done;
}
}
//不符合条件则直接释放page
ngx_slab_free_pages(pool, page, 1);
goto done;
}
goto chunk_already_free;
case NGX_SLAB_EXACT:
m = (uintptr_t) 1 <<
(((uintptr_t) p & (ngx_pagesize - 1)) >> ngx_slab_exact_shift);
size = ngx_slab_exact_size;
if ((uintptr_t) p & (size - 1)) {
goto wrong_chunk;
}
if (slab & m) {
if (slab == NGX_SLAB_BUSY) {
slots = (ngx_slab_page_t *)
((u_char *) pool + sizeof(ngx_slab_pool_t));
slot = ngx_slab_exact_shift - pool->min_shift;
page->next = slots[slot].next;
slots[slot].next = page;
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_EXACT;
page->next->prev = (uintptr_t) page | NGX_SLAB_EXACT;
}
page->slab &= ~m;
if (page->slab) {
goto done;
}
ngx_slab_free_pages(pool, page, 1);
goto done;
}
goto chunk_already_free;
case NGX_SLAB_BIG:
shift = slab & NGX_SLAB_SHIFT_MASK;
size = 1 << shift;
if ((uintptr_t) p & (size - 1)) {
goto wrong_chunk;
}
m = (uintptr_t) 1 << ((((uintptr_t) p & (ngx_pagesize - 1)) >> shift)
+ NGX_SLAB_MAP_SHIFT);
if (slab & m) {
if (page->next == NULL) {
slots = (ngx_slab_page_t *)
((u_char *) pool + sizeof(ngx_slab_pool_t));
slot = shift - pool->min_shift;
page->next = slots[slot].next;
slots[slot].next = page;
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_BIG;
page->next->prev = (uintptr_t) page | NGX_SLAB_BIG;
}
page->slab &= ~m;
if (page->slab & NGX_SLAB_MAP_MASK) {
goto done;
}
ngx_slab_free_pages(pool, page, 1);
goto done;
}
goto chunk_already_free;
case NGX_SLAB_PAGE:
if ((uintptr_t) p & (ngx_pagesize - 1)) {
goto wrong_chunk;
}
if (slab == NGX_SLAB_PAGE_FREE) {
ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): page is already free");
goto fail;
}
if (slab == NGX_SLAB_PAGE_BUSY) {
ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): pointer to wrong page");
goto fail;
}
n = ((u_char *) p - pool->start) >> ngx_pagesize_shift;
size = slab & ~NGX_SLAB_PAGE_START;
ngx_slab_free_pages(pool, &pool->pages[n], size);
ngx_slab_junk(p, size << ngx_pagesize_shift);
return;
}
/* not reached */
return;
done:
ngx_slab_junk(p, size);
return;
wrong_chunk:
ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): pointer to wrong chunk");
goto fail;
chunk_already_free:
ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): chunk is already free");
fail:
return;
}
申请page逻辑也较为简单,首先从内存池里面查找有没有空闲的page,没有则从另一个内存池里面去找。
static ngx_slab_page_t *
ngx_slab_alloc_pages(ngx_slab_pool_t *pool, ngx_uint_t pages)
{
ngx_slab_page_t *page, *p;
for (page = pool->free.next; page != &pool->free; page = page->next) {
if (page->slab >= pages) {
//从内存池里减去将分配的页数
if (page->slab > pages) {
page[page->slab - 1].prev = (uintptr_t) &page[pages];
//重新将free节点指向未被分配的page
page[pages].slab = page->slab - pages;
page[pages].next = page->next;
page[pages].prev = page->prev;
p = (ngx_slab_page_t *) page->prev;
p->next = &page[pages];
page->next->prev = (uintptr_t) &page[pages];
} else {
//刚好剩余则将free节点指向自己
p = (ngx_slab_page_t *) page->prev;
p->next = page->next;
page->next->prev = page->prev;
}
//初始化出去的头一个page的属性
page->slab = pages | NGX_SLAB_PAGE_START;
page->next = NULL;
page->prev = NGX_SLAB_PAGE;
if (--pages == 0) {
return page;
}
//初始化分配出去的page
for (p = page + 1; pages; pages--) {
p->slab = NGX_SLAB_PAGE_BUSY;
p->next = NULL;
p->prev = NGX_SLAB_PAGE;
p++;
}
return page;
}
}
if (pool->log_nomem) {
ngx_slab_error(pool, NGX_LOG_CRIT,
"ngx_slab_alloc() failed: no memory");
}
return NULL;
}
申请一页的内存:
申请两页内存:
static void
ngx_slab_free_pages(ngx_slab_pool_t *pool, ngx_slab_page_t *page,
ngx_uint_t pages)
{
ngx_uint_t type;
ngx_slab_page_t *prev, *join;
page->slab = pages--;
if (pages) {
ngx_memzero(&page[1], pages * sizeof(ngx_slab_page_t));
}
//若有next节点则将page从链表里脱离
if (page->next) {
prev = (ngx_slab_page_t *) (page->prev & ~NGX_SLAB_PAGE_MASK);
prev->next = page->next;
page->next->prev = page->prev;
}
join = page + page->slab;
//判断是否在pool的最后一页
if (join < pool->last) {
type = join->prev & NGX_SLAB_PAGE_MASK;
if (type == NGX_SLAB_PAGE) {
//若挂载在链表上则脱离
if (join->next != NULL) {
pages += join->slab;
page->slab += join->slab;
prev = (ngx_slab_page_t *) (join->prev & ~NGX_SLAB_PAGE_MASK);
prev->next = join->next;
join->next->prev = join->prev;
join->slab = NGX_SLAB_PAGE_FREE;
join->next = NULL;
join->prev = NGX_SLAB_PAGE;
}
}
}
if (page > pool->pages) {
join = page - 1;
type = join->prev & NGX_SLAB_PAGE_MASK;
//判断page类型
if (type == NGX_SLAB_PAGE) {
if (join->slab == NGX_SLAB_PAGE_FREE) {
join = (ngx_slab_page_t *) (join->prev & ~NGX_SLAB_PAGE_MASK);
}
if (join->next != NULL) {
pages += join->slab;
join->slab += page->slab;
prev = (ngx_slab_page_t *) (join->prev & ~NGX_SLAB_PAGE_MASK);
prev->next = join->next;
join->next->prev = join->prev;
page->slab = NGX_SLAB_PAGE_FREE;
page->next = NULL;
page->prev = NGX_SLAB_PAGE;
page = join;
}
}
}
//将要释放的page的最后一页指向开始的page
if (pages) {
page[pages].prev = (uintptr_t) page;
}
//加入free.next
page->prev = (uintptr_t) &pool->free;
page->next = pool->free.next;
page->next->prev = (uintptr_t) page;
pool->free.next = page;
}