在用gpreftools优化gko_pool的时候我发现一个问题,malloc竟然成了性能瓶颈 由于在每个连接建立的时候gko_pool默认会为读写各分配2KB的buf备用,这个是比较固定的 每个连接的的生命周期会伴随着4KB大小的内存malloc & free 正好可以写个只能分配固定大小内存的“内存池”,基本思路就是每次分配一个大内存bucket(64MB),需要4KB的块的时候就从bucket中取,当bucket没有可用slot就再分配一个新的bucket,当bucket的所有slot都空闲就把bucket整体free。bucket中寻找槽位用的二分查找实现,每个bit对应一个slot是否被占用,废话不说 上代码: memory.h
/* * memory.h * * Created on: Jun 8, 2012 * Author: auxten */ #ifndef GKO_MEMORY_H_ #define GKO_MEMORY_H_ #include <sys/types.h> #include <pthread.h> static const u_int32_t SLOT_SIZE = 4 * 1024; /// one page is good static const u_int32_t SLOT_COUNT = 1 * 1024 * 1024; static const u_int32_t M_MAP_SIZE = SLOT_COUNT / sizeof(u_int8_t); /// bitmap static const u_int32_t BUCKET_SIZE = 64 * 1024 * 1024; static const int32_t BUCKET_CAPACITY = BUCKET_SIZE / SLOT_SIZE; /// capacity, 4096 static const int32_t BUCKET_COUNT = SLOT_COUNT / BUCKET_CAPACITY; /// 256 static const int INVILID_BLOCK = -1; class gkoAlloc { private: pthread_mutex_t alloc_lock; u_int8_t m_map[M_MAP_SIZE]; /// 1MB can fit L2 cache void * bucket_s[BUCKET_COUNT]; int16_t bucket_used[BUCKET_COUNT]; int latest_bucket; int get_bit(u_int8_t * b); int free_bit(u_int8_t * b, int index); public: gkoAlloc(void); int get_block(void); int get_clear_block(void); int get2x_block(int block_id); void free_block(int block_id); int clear_block(void *block, int c, size_t size); void * id2addr(int block_id); }; #endif /* GKO_MEMORY_H_ */ /* * memory.cpp * * Created on: Jun 8, 2012 * Author: auxten * * only support little endian : x86 */ //#define MEM_TEST #define _XOPEN_SOURCE 600 #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <assert.h> #include <sys/types.h> #include "memory.h" gkoAlloc::gkoAlloc(void) { pthread_mutex_init(&alloc_lock, NULL); memset((void *) m_map, 0, M_MAP_SIZE * sizeof(u_int8_t)); memset((void *) bucket_s, 0, BUCKET_COUNT * sizeof(void *)); memset((void *) bucket_used, 0, BUCKET_COUNT * sizeof(int16_t)); latest_bucket = 0; } void * gkoAlloc::id2addr(int block_id) { if (block_id < 0) return NULL; int bucket_no = block_id / BUCKET_CAPACITY; int bucket_offset = SLOT_SIZE * (block_id % BUCKET_CAPACITY); return ((char *)bucket_s[bucket_no]) + bucket_offset; } int gkoAlloc::get_bit(u_int8_t * b) { /** * idx 01234567 * byte 11111010 * * return 5 and * byte 11111110 */ int i; for (i = 0; i < 8; i++) { if ((u_int8_t)((*b >> 7 - i) << 7) == (u_int8_t)0u) break; } *b |= (u_int8_t)( 1u << 7 - i); return i; } int gkoAlloc::free_bit(u_int8_t * b, int index) { /** * idx 01234567 * byte 11111110 * * return 5 and * byte 11111010 */ *b ^= (u_int8_t)( 1u << 7 - index); return index; } int gkoAlloc::get_block(void) { int i; int the_bucket; int idx = INVILID_BLOCK; u_int8_t * p_idx; u_int64_t * bucket_start_idx; u_int64_t * bucket_end_idx; u_int64_t * bucket_idx; pthread_mutex_lock(&alloc_lock); for (i = 0; i < BUCKET_COUNT; i++) { the_bucket = (latest_bucket + i) % BUCKET_COUNT; if (bucket_used[the_bucket] < BUCKET_CAPACITY) { latest_bucket = the_bucket; break; } } if (i == BUCKET_COUNT) { fprintf(stderr, "out of memory in pool\n"); // GKOLOG(FATAL, "out of memory in pool"); idx = INVILID_BLOCK; goto GET_BLOCK_RET; } if (!bucket_s[the_bucket]) { void * ptr; if (!posix_memalign(&ptr, SLOT_SIZE, BUCKET_SIZE)) { bucket_s[the_bucket] = ptr; bucket_used[the_bucket] = 0; } else { fprintf(stderr, "posix_memalign fail\n"); // GKOLOG(FATAL, "posix_memalign fail"); idx = INVILID_BLOCK; goto GET_BLOCK_RET; } } bucket_start_idx = (u_int64_t *) &(this->m_map[the_bucket * BUCKET_CAPACITY / 8]); bucket_end_idx = (u_int64_t *) &(this->m_map[(the_bucket + 1) * BUCKET_CAPACITY / 8]); for (bucket_idx = bucket_start_idx; bucket_idx < bucket_end_idx; bucket_idx++) { if (*(u_int64_t *) bucket_idx != ~0uLL) { if (*(u_int32_t *) bucket_idx != ~0u) { if (*((u_int16_t *) bucket_idx) != (u_int16_t) ~0u) { if (*(u_int8_t *) bucket_idx != (u_int8_t) ~0u) { p_idx = (u_int8_t *) bucket_idx + 0; } else { p_idx = (u_int8_t *) bucket_idx + 1; } } else { if (*((u_int8_t *) bucket_idx + 2) != (u_int8_t) ~0u) { p_idx = (u_int8_t *) bucket_idx + 2; } else { p_idx = (u_int8_t *) bucket_idx + 3; } } } else { if (*((u_int16_t *) bucket_idx + 2) != (u_int16_t) ~0u) { if (*((u_int8_t *) bucket_idx + 4) != (u_int8_t) ~0u) { p_idx = (u_int8_t *) bucket_idx + 4; } else { p_idx = (u_int8_t *) bucket_idx + 5; } } else { if (*((u_int8_t *) bucket_idx + 6) != (u_int8_t) ~0u) { p_idx = (u_int8_t *) bucket_idx + 6; } else { p_idx = (u_int8_t *) bucket_idx + 7; } } } idx = get_bit(p_idx) + 8 * (p_idx - (u_int8_t *) bucket_start_idx) + the_bucket * BUCKET_CAPACITY; bucket_used[the_bucket] ++; break; } else { continue; } } GET_BLOCK_RET: pthread_mutex_unlock(&alloc_lock); return idx; } void gkoAlloc::free_block(int block_id) { if (block_id < 0) return; int bucket_no = block_id / BUCKET_CAPACITY; pthread_mutex_lock(&alloc_lock); free_bit(&m_map[block_id / 8], block_id % 8); if(--bucket_used[bucket_no] == 0) { free(bucket_s[bucket_no]); bucket_s[bucket_no] = NULL; } else { latest_bucket = bucket_no; } pthread_mutex_unlock(&alloc_lock); } #ifdef MEM_TEST int main() { gkoAlloc mem; for (int i = 0; i < BUCKET_CAPACITY - 1; i++) { int k = mem.get_block(); printf("%d, %d\n", i, k); if (i != k) { break; } } int blk1 = mem.get_block(); int blk2 = mem.get_block(); int blk3 = mem.get_block(); printf("%p\n", mem.id2addr(blk1)); printf("%p\n", mem.id2addr(blk2)); printf("%p\n", mem.id2addr(blk3)); mem.free_block(blk1); mem.free_block(blk2); mem.free_block(blk3); return 0; } #endif
编译测试:
g++ -DMEM_TEST memory.cpp -o memory && ./memory
github:https://github.com/auxten/gkoAlloc