NCNN-Allocator
路径:src/allocator.h 和 src/allocator.cpp
主要分为三个部分:对mallco函数进行包装,使用带锁的内存池和使用不带锁的内存池
源码
// 地址指针对齐
tmplate<typename _Tp>
static _Tp* alignPtr(_Tp* ptr, int n = (int)(sizeof(_Tp)) {
// 大于等于ptr, 且是n的整数倍的最小整数
return (_Tp*)(((size_t)ptr + n - 1) & -n);
}
// 缓存区大小对齐
static size_t alignSize(size_t sz, int n) {
return (sz + n - 1) & -n;
}
// 内存分派
static void* fastMalloc(size_t size) {
// 多分配的内存空间是为了保存原始地址 + 地址对齐
unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD);
if (!udata) { return 0; }
// 留出用于保存原地址空间的位置,并调用alignPtr使指针位置对齐
unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN);
// 保存分配的原始指针
adata[-1] = udata;
return adata;
}
// 内存回收
static void fastFree(void* ptr) {
if (ptr) {
// 取出分配的原始指针并释放
unsigned char* udata = ((unsigned char**)ptr)[-1];
free(udata);
}
}
class NCNN_EXPORT Allocator {
public:
virtual ~Allocator();
virtual void* fastMalloc(size_t size) = 0;
virtual void fastFree(void* ptr) = 0;
};
class NCNN_EXPORT PoolAllocator : public Allocator {
public:
PoolAllocator();
~PoolAllocator();
// ratio range 0 ~ 1
// default cr = 0
void set_size_compare_ratio(float scr);
// budget drop threshold
// default threshold = 10
void set_size_drop_threshold(size_t);
// release all budgets immediately
void clear();
virtual void* fastMalloc(size_t size);
virtual void fastFree(void* ptr);
private:
PoolAllocatorPrivate* const d;
};
// 与PoolAllocator的区别在于是否有锁,所以只关注PoolAllocator就行
class NCNN_EXPORT UnlockedPoolAllocator : public Allocator;
class PoolAllocatorPrivate {
public:
Mutex budgets_lock;
Mutex payouts_lock;
unsigned int size_compare_ratio; // 0~256
size_t size_drop_threshold;
std::list<std::pair<size_t, void*> > budgets;
std::list<std::pair<size_t, void*> > payouts;
};
void PoolAllocator::clear() {
d->budgets_lock.lock();
std::list<std::pair<size_t, void*> >::iterator it = d->budgets.begin();
// 使用迭代器,依次释放所有申请的空间
for (; it != d->budgets.end(); ++it) {
void* ptr = it->second;
ncnn::fastFree(ptr);
}
d->budgets.clear();
d->budgets_lock.unlock();
}
// 这里应该是已经分配好内存池的内存,从中进行申请size大小的内存
void* PoolAllocator::fastMalloc(size_t size) {
d->budgets_lock.lock();
// find free budget
std::list<std::pair<size_t, void*> >::iterator it = d->budgets.begin(), it_max = d->budgets.begin(), it_min = d->budgets.begin();
for (; it != d->budgets.end(); ++it) {
size_t bs = it->first;
// size_compare_ratio ~ 100%
if (bs >= size && ((bs * d->size_compare_ratio) >> 8) <= size) {
// bs大小满足分配条件,从budgets中移除,添加到payouts中去
void* ptr = it->second;
d->budgets.erase(it);
d->budgets_lock.unlock();
d->payouts_lock.lock();
d->payouts.push_back(std::make_pair(bs, ptr));
d->payouts_lock.unlock();
return ptr;
}
if (bs < it_min->first) {
it_min = it;
}
if (bs > it_max->first) {
it_max = it;
}
}
if (d->budgets.size() >= d->size_drop_threshold) {
// All chunks in pool are not chosen. Then try to drop some outdated
// chunks and return them to OS.
if (it_max->first < size) {
// Current query is asking for a chunk larger than any cached chunks.
// Then remove the smallest one.
ncnn::fastFree(it_min->second);
d->budgets.erase(it_min);
}else if (it_min->first > size) {
// Current query is asking for a chunk smaller than any cached chunks.
// Then remove the largest one.
ncnn::fastFree(it_max->second);
d->budgets.erase(it_max);
}
}
d->budgets_lock.unlock();
// 直接使用调用全局的fastMalloc函数
void* ptr = ncnn::fastMalloc(size);
d->payouts_lock.lock();
d->payouts.push_back(std::make_pair(size, ptr));
d->payouts_lock.unlock();
return ptr;
}
void PoolAllocator::fastFree(void* ptr) {
d->payouts_lock.lock();
// return to budgets
std::list<std::pair<size_t, void*> >::iterator it = d->payouts.begin();
for (; it != d->payouts.end(); ++it) {
if (it->second == ptr) {
// 从payouts中移除,添加到budgets中去
size_t size = it->first;
d->payouts.erase(it);
d->payouts_lock.unlock();
d->budgets_lock.lock();
d->budgets.push_back(std::make_pair(size, ptr));
d->budgets_lock.unlock();
return;
}
}
d->payouts_lock.unlock();
NCNN_LOGE("FATAL ERROR! pool allocator get wild %p", ptr);
// 不在payouts中的内存指针,使用全局的fastFree来释放
ncnn::fastFree(ptr);
}
测试代码
#include <iostream>
// the alignment of all the allocated buffers
#define MALLOC_ALIGN 16
// Aligns a pointer to the specified number of bytes
template <typename _Tp>
static inline _Tp* AlignPtr(_Tp* ptr, int n = (int)sizeof(_Tp)) {
return (_Tp*)(((size_t)ptr + n + 1) & -n);
}
// Aligns a buffer size to the specified number of bytes
static inline size_t AlignSize(size_t sz, int n) {
return (sz + n - 1) & -n; // -16: 1111 1111 1111 0000
}
static inline void* FastMalloc(size_t size) {
unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + MALLOC_ALIGN);
if (!udata) { return 0; }
std::cout << "udata + 1: " << (unsigned char**)udata + 1 << std::endl;
unsigned char** adata = AlignPtr((unsigned char**)udata + 1, MALLOC_ALIGN);
adata[-1] = udata;
return adata;
}
static inline void FastFree (void *ptr) {
if (ptr) {
unsigned char* udata = ((unsigned char**)ptr)[-1];
free(udata);
}
}
int main() {
int* address = new int(5);
std::cout << "address: " << address << std::endl;
std::cout << "(size_t)address: " << (size_t)address << std::endl;
std::cout << "(long long)address: " << (long long)address << std::endl;
size_t total_size = AlignSize(12, 4);
size_t total_size_1 = AlignSize(17, 4);
std::cout << "total_size: " << total_size << std::endl;
std::cout << "total_size_1: " << total_size_1 << std::endl;
void* data = FastMalloc(total_size);
std::cout << "data: " << data << std::endl;
std::cout << "(void*)data: " << (void*)data << std::endl;
std::cout << "(int*)data: " << (int*)data << std::endl;
std::cout << "(float*)data: " << (float*)data << std::endl;
FastFree(data);
return 0;
}
参考
https://zhuanlan.zhihu.com/p/335774042
https://www.jianshu.com/p/be716c3822f5