测试代码:
#include"ConcurrentAlloc.h"
//ntimes:一轮申请与释放内存的此时 rounds:一共测试几轮 nworks线程数
void TestBenchMarkMalloc(size_t ntimes, size_t rounds, size_t nworks) {
std::cout << "Malloc Test" << std::endl;
std::vector<std::thread>ThreadPool(nworks);
//计算每个线程申请释放的时间再相加,使用的是原子性的相加,C++11语法
std::atomic<size_t>malloc_time = 0;
std::atomic<size_t>free_time = 0;
for (size_t i = 0; i < nworks; i++) {
ThreadPool[i] = std::thread(
[&, i]() {//表示引用传递捕捉所有父作用域中的变量(包括this)
std::vector<void*>Pool;//保存申请的空间地址
Pool.reserve(ntimes);
for (size_t j = 0; j < rounds; j++) {
size_t begin = clock();
for (size_t k = 0; k < ntimes; k++) {
Pool.push_back(malloc(8 + k));
}
size_t end = clock();
size_t begin2 = clock();
for (size_t k = 0; k < ntimes; k++) {
free(Pool[k]);
}
size_t end2 = clock();
Pool.clear();
malloc_time += (end - begin);
free_time += (end2 - begin2);
}
}
);//C++11 lambda表达式
}
for (auto& e : ThreadPool) {
e.join();
}
std::cout << nworks << "个线程 单个线程测试"<<rounds<<"轮 每轮" << ntimes << "次申请与释放内存" << std::endl <<
"申请时间" << malloc_time.load() << "ms 释放时间" << free_time.load() << "ms\n总时间" << free_time.load() + malloc_time.load() <<"ms" << std::endl;
std::cout << "########################################" << std::endl;
}
void TestConcurrentAlloc(size_t ntimes, size_t rounds, size_t nworks) {
std::cout << "ConcurrentAlloc Test" << std::endl;
std::vector<std::thread>ThreadPool(nworks);
std::atomic<size_t>Alloc_time = 0;
std::atomic<size_t>Delete_time = 0;
for (size_t i = 0; i < nworks; i++) {
ThreadPool[i] = std::thread(
[&, i]() {
std::vector<void*>Pool;
Pool.reserve(ntimes);
for (size_t j = 0; j < rounds; j++) {
size_t begin = clock();
for (size_t k = 0; k < ntimes; k++) {
Pool.push_back(ConcurrentAlloc(8 ));
}
size_t end = clock();
size_t begin2 = clock();
for (size_t k = 0; k < ntimes; k++) {
ConcurrentFree(Pool[k]);
}
size_t end2 = clock();
Pool.clear();
Alloc_time += (end - begin);
Delete_time += (end2 - begin2);
}
}
);//C++11 lambda表达式
}
for (auto& e : ThreadPool) {
e.join();
}
std::cout << nworks << "个线程 单个线程测试" << rounds << "轮 每轮" << ntimes << "次申请与释放内存" << std::endl <<
"申请时间" << Alloc_time.load() << "ms 释放时间" << Delete_time.load() << "ms\n总时间" << Alloc_time.load() + Delete_time.load() << "ms" << std::endl;
std::cout << "########################################" << std::endl;
}
int main() {
TestBenchMarkMalloc(10000,4,10);
TestConcurrentAlloc(10000, 4, 10);
return 0;
}
上述代码分别测试了在多线程下malloc和包并发内存池下申请与释放所需要的时间。
注意:
在测试时发现的错误:
1. 在切分Span时,Span中自由链表尾没有置空导致切分空间出错,需要加上NextObj(tail) = nullptr
2.当前桶中存在Span时直接返回了,没有将Span与页号建立起映射
//看当前位置桶中是否有Span
if (!_SpanList[NumPage].Empty()) {
Span*NumPageSpan =_SpanList[NumPage].Pop();
for (PAGE_ID i = 0; i < NumPageSpan->_Num; i++) {//切了Num页
IdSpanMap[NumPageSpan->_PageID + i] = NumPageSpan;
}
return NumPageSpan;
}
Span* CentralCache::GetSpan(SpanList& List, size_t size) {
//在哈希桶对应位置Span链表中找是否有Span,没有就向PageCache申请空间
//遍历桶的Span链表
Span* it = List.begin();
while (it != List.end()) {
if (it->FreeList != nullptr) {
return it;//这个Span有空间
}
else {
//Span没有空间,继续找下一个链表Span
it = it->_next;
}
}
//先把CentralCache的桶锁解开,如果其他线程释放内存不会阻塞
List._mtx.unlock();
//没有空闲的Span只能找PageCache,需要加锁,PageCache只能由一个线程访问
//size是单个对象的大小
PageCache::GetInst()->_PageMtx.lock();
Span* span=PageCache::GetInst()->NewSpan(SizeClass::NumForPage(size));
span->IsUse = true;
span->ObjectSize = size;//保存size,为了让ConcurrentFree 释放空间时不需要传大小
PageCache::GetInst()->_PageMtx.unlock();
//获得了一块大Span,这块Span这时被线程单独看到,不需要加锁(没有挂到桶上)
//Span起始地址
char* start = (char*)((span->_PageID) << PAGESIZE);
size_t ByteSize = (span->_Num) << PAGESIZE;
char* end = start + ByteSize;
//把Span内部大块内存切成自由链表链接起来
span->FreeList = start;
start += size;//自由链表的头节点
void* tail = span->FreeList;
while (start < end) {
NextObj(tail) = start;
tail = NextObj(tail);
start += size;
}
NextObj(tail) = nullptr;//出现BUG的原因
List._mtx.lock();
List.Insert(List.begin(), span);//将Span挂到桶上,此时需要加桶锁
return span;
}
测试截图:
如上图可以看出高并发内存池在多线程下申请空间的效率比malloc要高很多
修改后,并测试代码位置: