在用gpreftools优化gko_pool的时候我发现一个问题,malloc竟然成了性能瓶颈
由于在每个连接建立的时候gko_pool默认会为读写各分配2KB的buf备用,这个是比较固定的
每个连接的的生命周期会伴随着4KB大小的内存malloc & free
正好可以写个只能分配固定大小内存的“内存池”,基本思路就是每次分配一个大内存bucket(64MB),需要4KB的块的时候就从bucket中取,当bucket没有可用slot就再分配一个新的bucket,当bucket的所有slot都空闲就把bucket整体free。bucket中寻找槽位用的二分查找实现,每个bit对应一个slot是否被占用,废话不说 上代码:
memory.h/*
* memory.h
*
* Created on: Jun 8, 2012
* Author: auxten
*/
#ifndef GKO_MEMORY_H_
#define GKO_MEMORY_H_
#include
#include
static const u_int32_t SLOT_SIZE = 4 * 1024; /// one page is good
static const u_int32_t SLOT_COUNT = 1 * 1024 * 1024;
static const u_int32_t M_MAP_SIZE = SLOT_COUNT / sizeof(u_int8_t); /// bitmap
static const u_int32_t BUCKET_SIZE = 64 * 1024 * 1024;
static const int32_t BUCKET_CAPACITY = BUCKET_SIZE / SLOT_SIZE; /// capacity, 4096
static const int32_t BUCKET_COUNT = SLOT_COUNT / BUCKET_CAPACITY; /// 256
static const int INVILID_BLOCK = -1;
class gkoAlloc
{
private:
pthread_mutex_t alloc_lock;
u_int8_t m_map[M_MAP_SIZE]; /// 1MB can fit L2 cache
void * bucket_s[BUCKET_COUNT];
int16_t bucket_used[BUCKET_COUNT];
int latest_bucket;
int get_bit(u_int8_t * b);
int free_bit(u_int8_t * b, int index);
public:
gkoAlloc(void);
int get_block(void);
int get_clear_block(void);
int get2x_block(int block_id);
void free_block(int block_id);
int clear_block(void *block, int c, size_t size);
void * id2addr(int block_id);
};
#endif /* GKO_MEMORY_H_ */
memory.cpp/*
* memory.cpp
*
* Created on: Jun 8, 2012
* Author: auxten
*
* only support little endian : x86
*/
//#define MEM_TEST
#define _XOPEN_SOURCE 600
#include
#include
#include
#include
#include
#include "memory.h"
gkoAlloc::gkoAlloc(void)
{
pthread_mutex_init(&alloc_lock, NULL);
memset((void *) m_map, 0, M_MAP_SIZE * sizeof(u_int8_t));
memset((void *) bucket_s, 0, BUCKET_COUNT * sizeof(void *));
memset((void *) bucket_used, 0, BUCKET_COUNT * sizeof(int16_t));
latest_bucket = 0;
}
void * gkoAlloc::id2addr(int block_id)
{
if (block_id
return NULL;
int bucket_no = block_id / BUCKET_CAPACITY;
int bucket_offset = SLOT_SIZE * (block_id % BUCKET_CAPACITY);
return ((char *)bucket_s[bucket_no]) + bucket_offset;
}
int gkoAlloc::get_bit(u_int8_t * b)
{
/**
* idx 01234567
* byte 11111010
*
* return 5 and
* byte 11111110
*/
int i;
for (i = 0; i
{
if ((u_int8_t)((*b >> 7 - i) <
break;
}
*b |= (u_int8_t)( 1u <
return i;
}
int gkoAlloc::free_bit(u_int8_t * b, int index)
{
/**
* idx 01234567
* byte 11111110
*
* return 5 and
* byte 11111010
*/
*b ^= (u_int8_t)( 1u <
return index;
}
int gkoAlloc::get_block(void)
{
int i;
int the_bucket;
int idx = INVILID_BLOCK;
u_int8_t * p_idx;
u_int64_t * bucket_start_idx;
u_int64_t * bucket_end_idx;
u_int64_t * bucket_idx;
pthread_mutex_lock(&alloc_lock);
for (i = 0; i
{
the_bucket = (latest_bucket + i) % BUCKET_COUNT;
if (bucket_used[the_bucket]
{
latest_bucket = the_bucket;
break;
}
}
if (i == BUCKET_COUNT)
{
fprintf(stderr, "out of memory in pool\n");
// GKOLOG(FATAL, "out of memory in pool");
idx = INVILID_BLOCK;
goto GET_BLOCK_RET;
}
if (!bucket_s[the_bucket])
{
void * ptr;
if (!posix_memalign(&ptr, SLOT_SIZE, BUCKET_SIZE))
{
bucket_s[the_bucket] = ptr;
bucket_used[the_bucket] = 0;
}
else
{
fprintf(stderr, "posix_memalign fail\n");
// GKOLOG(FATAL, "posix_memalign fail");
idx = INVILID_BLOCK;
goto GET_BLOCK_RET;
}
}
bucket_start_idx = (u_int64_t *) &(this->m_map[the_bucket * BUCKET_CAPACITY / 8]);
bucket_end_idx = (u_int64_t *) &(this->m_map[(the_bucket + 1) * BUCKET_CAPACITY / 8]);
for (bucket_idx = bucket_start_idx;
bucket_idx
bucket_idx++)
{
if (*(u_int64_t *) bucket_idx != ~0uLL)
{
if (*(u_int32_t *) bucket_idx != ~0u)
{
if (*((u_int16_t *) bucket_idx) != (u_int16_t) ~0u)
{
if (*(u_int8_t *) bucket_idx != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 0;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 1;
}
}
else
{
if (*((u_int8_t *) bucket_idx + 2) != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 2;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 3;
}
}
}
else
{
if (*((u_int16_t *) bucket_idx + 2) != (u_int16_t) ~0u)
{
if (*((u_int8_t *) bucket_idx + 4) != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 4;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 5;
}
}
else
{
if (*((u_int8_t *) bucket_idx + 6) != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 6;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 7;
}
}
}
idx = get_bit(p_idx) +
8 * (p_idx - (u_int8_t *) bucket_start_idx) +
the_bucket * BUCKET_CAPACITY;
bucket_used[the_bucket] ++;
break;
}
else
{
continue;
}
}
GET_BLOCK_RET:
pthread_mutex_unlock(&alloc_lock);
return idx;
}
void gkoAlloc::free_block(int block_id)
{
if (block_id
return;
int bucket_no = block_id / BUCKET_CAPACITY;
pthread_mutex_lock(&alloc_lock);
free_bit(&m_map[block_id / 8], block_id % 8);
if(--bucket_used[bucket_no] == 0)
{
free(bucket_s[bucket_no]);
bucket_s[bucket_no] = NULL;
}
else
{
latest_bucket = bucket_no;
}
pthread_mutex_unlock(&alloc_lock);
}
#ifdef MEM_TEST
int main()
{
gkoAlloc mem;
for (int i = 0; i
{
int k = mem.get_block();
printf("%d, %d\n", i, k);
if (i != k)
{
break;
}
}
int blk1 = mem.get_block();
int blk2 = mem.get_block();
int blk3 = mem.get_block();
printf("%p\n", mem.id2addr(blk1));
printf("%p\n", mem.id2addr(blk2));
printf("%p\n", mem.id2addr(blk3));
mem.free_block(blk1);
mem.free_block(blk2);
mem.free_block(blk3);
return 0;
}
#endif
编译测试:g++ -DMEM_TEST memory.cpp -o memory && ./memory
需要的拿去