C++项目 – 高并发内存池(六)解决一些细节问题
文章目录
一、大于256KB的内存对象的申请
1.申请流程
- 256KB是32页,直接向Page Cache申请页为单位的span更加高效;
- 如果申请内存的对象大小大于128页,在Page Cache中直接向堆申请;
- 对象释放内存也一样,大于256KB的对象直接释放给Page Cache,大于128页的内存直接释放给堆;
2.代码实现
Common.h
- 增加了直接向堆释放内存的函数
SystemFree
; - 增加了
RoundUp
对齐规则中大于256KB对象的对齐规则;
//直接释放空间给堆
inline static void SystemFree(void* ptr) {
#ifdef _WIN32
VirtualFree(ptr, 0, MEM_RELEASE);
#else
#endif
}
// 管理对齐和哈希映射规则的类
class SizeClass {
public:
//计算当前对象size字节对齐之后对应的size
static inline size_t RoundUp(size_t size) {
if (size <= 128) {
//8字节对齐
return _RoundUp(size, 8);
}
else if (size <= 1024) {
//16字节对齐
return _RoundUp(size, 16);
}
else if (size <= 8 * 1024) {
//128字节对齐
return _RoundUp(size, 128);
}
else if (size <= 64 * 1024) {
//1024字节对齐
return _RoundUp(size, 1024);
}
else if (size <= 256 * 1024) {
//8KB字节对齐
return _RoundUp(size, 8 * 1024);
}
else {
//大于256KB就以页为单位对齐
return _RoundUp(size, 1 << PAGE_SHIFT);
}
return -1;
}
}
ConcurrentAlloc.h
ConcurrentAlloc
函数申请内存时,对象大于256KB就从page cache直接申请;ConcurrentFree
函数释放内存时,对象大于256KB直接归还给page cache;
static void* ConcurrentAlloc(size_t size) {
if (size > MAXBYTE) {
//大于256KB的直接向page cache申请
size_t alignSize = SizeClass::RoundUp(size);
size_t kpage = alignSize >> PAGE_SHIFT;
PageCache::GetInstance()->_pageMtx.lock();
Span* span = PageCache::GetInstance()->NewSpan(kpage);
span->_isUse = true;
PageCache::GetInstance()->_pageMtx.unlock();
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
return ptr;
}
else {
if (pTLSThreadCache == nullptr) {
//如果pTLSThreadCache指针是空的,就构造一个ThreadCache对象,并指向它
//则这个ThreadCache对象就是本线程专属的ThreadCache对象
pTLSThreadCache = new ThreadCache;
}
cout << std::this_thread::get_id() << " : " << pTLSThreadCache << endl;
//使用pTLSThreadCache访问本线程专属的ThreadCache对象来开辟空间
return pTLSThreadCache->Allocate(size);
}
}
static void ConcurrentFree(void* obj, size_t size) {
if (size > MAXBYTE) {
//大于256KB的内存直接释放给page cache
Span* span = PageCache::GetInstance()->MapObjectToSpan(obj);
PageCache::GetInstance()->_pageMtx.lock();
PageCache::GetInstance()->ReleaseSpanToPageCache(span);
PageCache::GetInstance()->_pageMtx.unlock();
}
else {
assert(pTLSThreadCache);
pTLSThreadCache->Deallocate(obj, size);
}
}
PageCache.cpp
NewSpan
函数如果申请内存大于128页,直接向堆申请;ReleaseSpanToPageCache
函数如果释放内存大于128页,直接释放给堆;
Span* PageCache::NewSpan(size_t k) {
//大于128页的span直接从堆申请
if (k > NPAGES - 1) {
void* ptr = SystemAlloc(k);
Span* span = new Span;
span->_n = k;
span->_pageID = (PAGE_ID)ptr >> PAGE_SHIFT;
//存储页号与span的映射关系
_idSpanMap[span->_pageID] = span;
return span;
}
assert(k > 0 && k < NPAGES);
//先检查第k个桶里面有没有span
if (!_spanLists[k].Empty()) {
//有就返回
return _spanLists[k].PopFront();
}
//没有就需要检查后面的桶有没有更大的span,如果有可以拆分
for (size_t i = k + 1; i < NPAGES; i++) {
if (!_spanLists[i].Empty()) {
Span* nspan = _spanLists[i].PopFront();
Span* kspan = new Span;
//在nspan头部且下一个k页的span
//kspan返回
//nspan剩下的部分挂载到相应的桶上
kspan->_pageID = nspan->_pageID;
kspan->_n = k;
nspan->_pageID += k;
nspan->_n -= k;
_spanLists[nspan->_n].PushFront(nspan);
//存储nspan的首尾页号与Span的关系,方便page cache回收内存时进行合并查找
_idSpanMap[nspan->_pageID] = nspan;
_idSpanMap[nspan->_pageID + nspan->_n - 1] = nspan;
//存储kspan每一页的页号与span的映射,方便central cache回收小块内存时,查找对应的span
for (PAGE_ID i = 0; i < kspan->_n; i++) {
_idSpanMap[kspan->_pageID + i] = kspan;
}
return kspan;
}
}
//走到这里说明没有更大的span了,需要向堆申请一个128页的大块内存
Span* bigSpan = new Span;
void* ptr = SystemAlloc(NPAGES - 1);
bigSpan->_pageID = (PAGE_ID)ptr >> PAGE_SHIFT;
bigSpan->_n = NPAGES - 1;
_spanLists[NPAGES - 1].PushFront(bigSpan);
//此时需要将_spanLists中的128页的内存切分,递归调用一下
return NewSpan(k);
}
void PageCache::ReleaseSpanToPageCache(Span* span) {
//大于128页的span,直接归还给堆
if (span->_n > NPAGES - 1) {
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
SystemFree(ptr);
delete span;
return;
}
//对span前后的相邻页进行合并,缓解内存碎片的问题
//向前合并
while (1) {
//前一页的id
PAGE_ID prevId = span->_pageID - 1;
//从map中寻找页号与span的映射
auto ret = _idSpanMap.find(prevId);
//前面的页号没有,不合并
if (ret == _idSpanMap.end()) {
break;
}
//前面相邻页的span在使用,不合并
Span* prevSpan = ret->second;
if (prevSpan->_isUse == true) {
break;
}
//合并超出128页的span没法管理,不合并
if (prevSpan->_n + span->_n > NPAGES - 1) {
break;
}
//合并前面的span
span->_pageID = prevSpan->_pageID;
span->_n += prevSpan->_n;
delete prevSpan;
}
//向后合并
while (1) {
PAGE_ID nextId = span->_pageID + span->_n;
auto ret = _idSpanMap.find(nextId);
if (ret == _idSpanMap.end()) {
break;
}
Span* nextSpan = ret->second;
if (nextSpan->_isUse == true) {
break;
}
if (nextSpan->_n + span->_n > NPAGES - 1) {
break;
}
span->_n += nextSpan->_n;
_spanLists[nextSpan->_n].Erase(nextSpan);
delete nextSpan;
}
//将合并好的span挂载到对应的哈希桶,更新isUse
_spanLists[span->_n].PushFront(span);
span->_isUse = false;
_idSpanMap[span->_pageID] = span;
_idSpanMap[span->_pageID + span->_n - 1] = span;
}
二、使用定长内存池配合脱离使用new
1.原理
- 高并发内存池的目的就是替代malloc,但是代码中此时还是存在使用new来创建对象,因此需要脱离malloc;
- 可以使用之前写过的定长内存池来代替new;
2.代码实现
PageCache.h
- 添加一个定长内存池成员
#pragma once
#include "Common.h"
#include "ObjectPool.h"
//单例模式
class PageCache {
public:
static PageCache* GetInstance() {
return &_sInstance;
}
std::mutex _pageMtx; //全局锁
//获取一个k页的Span
Span* NewSpan(size_t k);
//获取对象到span的映射
Span* MapObjectToSpan(void* obj);
//释放空闲span回到page cache,并合并相邻的span
void ReleaseSpanToPageCache(Span* span);
private:
SpanList _spanLists[NPAGES];
// 用于存储页号到Span的映射关系
std::unordered_map<PAGE_ID, Span*> _idSpanMap;
//定长内存池用于代替new
ObjectPool<Span> _spanPool;
PageCache() {}
PageCache(const PageCache&) = delete;
static PageCache _sInstance;
};
PageCache.cpp
- 将所有的new和delete都改成定长内存池的new和delete;
- 因为定长内存池也是全局唯一的,因此多线程对它的访问也需要加锁;
#define _CRT_SECURE_NO_WARNINGS 1
#include "PageCache.h"
PageCache PageCache::_sInstance;
Span* PageCache::NewSpan(size_t k) {
assert(k > 0);
//大于128页的span直接从堆申请
if (k > NPAGES - 1) {
void* ptr = SystemAlloc(k);
//Span* span = new Span;
_pageMtx.lock();
Span* span = _spanPool.New();
_pageMtx.unlock();
span->_n = k;
span->_pageID = (PAGE_ID)ptr >> PAGE_SHIFT;
//存储页号与span的映射关系
//_idSpanMap[span->_pageID] = span;
_idSpanMap.set(span->_pageID, span);
return span;
}
//先检查第k个桶里面有没有span
if (!_spanLists[k].Empty()) {
有就返回
Span* kSpan = _spanLists[k].PopFront();
// 建立id和span的映射,方便central cache回收小块内存时,查找对应的span
for (PAGE_ID i = 0; i < kSpan->_n; ++i)
{
//_idSpanMap[kSpan->_pageID + i] = kSpan;
_idSpanMap.set(kSpan->_pageID + i, kSpan);
}
return kSpan;
}
//没有就需要检查后面的桶有没有更大的span,如果有可以拆分
for (size_t i = k + 1; i < NPAGES; i++) {
if (!_spanLists[i].Empty()) {
Span* nspan = _spanLists[i].PopFront();
//Span* kspan = new Span;
Span* kspan = _spanPool.New();
//在nspan头部且下一个k页的span
//kspan返回
//nspan剩下的部分挂载到相应的桶上
kspan->_pageID = nspan->_pageID;
kspan->_n = k;
nspan->_pageID += k;
nspan->_n -= k;
_spanLists[nspan->_n].PushFront(nspan);
//存储nspan的首尾页号与Span的关系,方便page cache回收内存时进行合并查找
//_idSpanMap[nspan->_pageID] = nspan;
//_idSpanMap[nspan->_pageID + nspan->_n - 1] = nspan;
_idSpanMap.set(nspan->_pageID, nspan);
_idSpanMap.set(nspan->_pageID + nspan->_n - 1, nspan);
//存储kspan每一页的页号与span的映射,方便central cache回收小块内存时,查找对应的span
for (PAGE_ID i = 0; i < kspan->_n; i++) {
//_idSpanMap[kspan->_pageID + i] = kspan;
_idSpanMap.set(kspan->_pageID + i, kspan);
}
return kspan;
}
}
//走到这里说明没有更大的span了,需要向堆申请一个128页的大块内存
//Span* bigSpan = new Span;
Span* bigSpan = _spanPool.New();
void* ptr = SystemAlloc(NPAGES - 1);
bigSpan->_pageID = (PAGE_ID)ptr >> PAGE_SHIFT;
bigSpan->_n = NPAGES - 1;
_spanLists[bigSpan->_n].PushFront(bigSpan);
//此时需要将_spanLists中的128页的内存切分,递归调用一下
return NewSpan(k);
}
Span* PageCache::MapObjectToSpan(void* obj) {
PAGE_ID id = ((PAGE_ID)obj >> PAGE_SHIFT);
//加入RAII锁,出了函数作用域自动解锁
//std::unique_lock<std::mutex> lock(_pageMtx);
//auto ret = _idSpanMap.find(id);
//if (ret != _idSpanMap.end()) {
// return ret->second;
//}
//else {
// //应该是一定能够获取到的
// //如果获取不到就是出现了问题
// assert(false);
// return nullptr;
//}
//使用基数树改进后不需要加锁
auto ret = (Span*)_idSpanMap.get(id);
assert(ret != nullptr);
return ret;
}
void PageCache::ReleaseSpanToPageCache(Span* span) {
//大于128页的span,直接归还给堆
if (span->_n > NPAGES - 1) {
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
SystemFree(ptr);
//delete span;
_spanPool.Delete(span);
return;
}
//对span前后的相邻页进行合并,缓解内存碎片的问题
//向前合并
while (1) {
//前一页的id
PAGE_ID prevId = span->_pageID - 1;
//从map中寻找页号与span的映射
//auto ret = _idSpanMap.find(prevId);
前面的页号没有,不合并
//if (ret == _idSpanMap.end()) {
// break;
//}
auto ret = (Span*)_idSpanMap.get(prevId);
if (ret == nullptr) {
break;
}
//前面相邻页的span在使用,不合并
//Span* prevSpan = ret->second;
Span* prevSpan = ret;
if (prevSpan->_isUse == true) {
break;
}
//合并超出128页的span没法管理,不合并
if (prevSpan->_n + span->_n > NPAGES - 1) {
break;
}
//合并前面的span
span->_pageID = prevSpan->_pageID;
span->_n += prevSpan->_n;
_spanLists[prevSpan->_n].Erase(prevSpan);
//delete prevSpan;
_spanPool.Delete(prevSpan);
}
//向后合并
while (1) {
PAGE_ID nextId = span->_pageID + span->_n;
//auto ret = _idSpanMap.find(nextId);
//if (ret == _idSpanMap.end()) {
// break;
//}
auto ret = (Span*)_idSpanMap.get(nextId);
if (ret == nullptr) {
break;
}
//Span* nextSpan = ret->second;
Span* nextSpan = ret;
if (nextSpan->_isUse == true) {
break;
}
if (nextSpan->_n + span->_n > NPAGES - 1) {
break;
}
span->_n += nextSpan->_n;
_spanLists[nextSpan->_n].Erase(nextSpan);
//delete nextSpan;
_spanPool.Delete(nextSpan);
}
//将合并好的span挂载到对应的哈希桶,更新isUse
_spanLists[span->_n].PushFront(span);
span->_isUse = false;
//_idSpanMap[span->_pageID] = span;
//_idSpanMap[span->_pageID + span->_n - 1] = span;
_idSpanMap.set(span->_pageID, span);
_idSpanMap.set(span->_pageID + span->_n - 1, span);
}
ConcurrentAlloc.h
- 同上
static void* ConcurrentAlloc(size_t size) {
if (size > MAXBYTE) {
//大于256KB的直接向page cache申请
size_t alignSize = SizeClass::RoundUp(size);
size_t kpage = alignSize >> PAGE_SHIFT;
PageCache::GetInstance()->_pageMtx.lock();
Span* span = PageCache::GetInstance()->NewSpan(kpage);
span->_isUse = true;
PageCache::GetInstance()->_pageMtx.unlock();
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
return ptr;
}
else {
if (pTLSThreadCache == nullptr) {
//如果pTLSThreadCache指针是空的,就构造一个ThreadCache对象,并指向它
//则这个ThreadCache对象就是本线程专属的ThreadCache对象
static ObjectPool<ThreadCache> tcPool;
//pTLSThreadCache = new ThreadCache;
pTLSThreadCache = tcPool.New();
}
cout << std::this_thread::get_id() << " : " << pTLSThreadCache << endl;
//使用pTLSThreadCache访问本线程专属的ThreadCache对象来开辟空间
return pTLSThreadCache->Allocate(size);
}
}
三、释放对象时不传对象的size
1.原理
- 由于Central Cache模块的设计,一个span所切分的内存块大小都是一样的,所以可以在span类中增加一个成员,保存内存块的大小;
- 在central cache切分的时候,直接将该span切分的内存块大小保存起来;
2.代码实现
Common.h
- 增加一个对象大小的属性
_objSize
struct Span
{
PAGE_ID _pageID = 0; // 大块内存起始页的页号
size_t _n = 0; // 页的数量
Span* _next = nullptr; // 双向链表的结构
Span* _prev = nullptr;
size_t _objSize = 0; // 切好的小对象的大小
size_t _useCount = 0; // 切好小块内存,被分配给thread cache的计数
void* _freeList = nullptr; // 切好的小块内存的自由链表
bool _isUse = false; // 是否在被使用
};
CentralCache.cpp
- 在切分Span的时候,保存该span切分对象的大小;
Span* CentralCache::GetOneSpan(SpanList& spanList, size_t size) {
//先检查该SpanList有没有未分配的Span
Span* it = spanList.Begin();
while (it != spanList.End()) {
if (it->_freeList != nullptr) {
return it;
}
else {
it = it->_next;
}
}
//先把central cache 的桶锁解掉,这样如果其他线程释放对象回来,就不会被阻塞
spanList._mtx.unlock();
//SpanList中没有空闲的Span,需要向page cache申请
//在此处加上page cache的全局锁,NewSpan的所有操作都是加锁进行的
PageCache::GetInstance()->_pageMtx.lock();
Span* span = PageCache::GetInstance()->NewSpan(SizeClass::NumMovePage(size));
//更新_isUse属性
span->_isUse = true;
//存储该span切分对象的size
span->_objSize = size;
PageCache::GetInstance()->_pageMtx.unlock();
//从page cache获取到了新的span,需要进行切分
//无需在此加上桶锁,因为该span还没有放到spanList中,其他线程访问不到
//计算span大块内存的起始地址和大块内存的大小(字节数)
char* start = (char*)(span->_pageID << PAGE_SHIFT);
size_t bytes = span->_n << PAGE_SHIFT;
char* end = start + bytes;
//把大块内存切成自由链表链接起来
//先切一块下来做头,方便尾插
span->_freeList = start;
start += size;
void* tail = span->_freeList;
while (start < end) {
NextObj(tail) = start;
tail = start;
start += size;
}
//在span挂载到spanList之前加上桶锁
spanList._mtx.lock();
spanList.PushFront(span);
return span;
}
ConcurrentAlloc.h
ConcurrentAlloc
申请对象大于256KB的时候,也需要在span中存一下对象size;ConcurrentFree
释放对象可以去掉对象size参数,直接从span中获取;
static void* ConcurrentAlloc(size_t size) {
if (size > MAXBYTE) {
//大于256KB的直接向page cache申请
size_t alignSize = SizeClass::RoundUp(size);
size_t kpage = alignSize >> PAGE_SHIFT;
PageCache::GetInstance()->_pageMtx.lock();
Span* span = PageCache::GetInstance()->NewSpan(kpage);
span->_isUse = true;
span->_objSize = size;
PageCache::GetInstance()->_pageMtx.unlock();
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
return ptr;
}
else {
if (pTLSThreadCache == nullptr) {
//如果pTLSThreadCache指针是空的,就构造一个ThreadCache对象,并指向它
//则这个ThreadCache对象就是本线程专属的ThreadCache对象
static ObjectPool<ThreadCache> tcPool;
//pTLSThreadCache = new ThreadCache;
pTLSThreadCache = tcPool.New();
}
cout << std::this_thread::get_id() << " : " << pTLSThreadCache << endl;
//使用pTLSThreadCache访问本线程专属的ThreadCache对象来开辟空间
return pTLSThreadCache->Allocate(size);
}
}
static void ConcurrentFree(void* obj) {
Span* span = PageCache::GetInstance()->MapObjectToSpan(obj);
size_t size = span->_objSize;
if (size > MAXBYTE) {
//大于256KB的内存直接释放给page cache
PageCache::GetInstance()->_pageMtx.lock();
PageCache::GetInstance()->ReleaseSpanToPageCache(span);
PageCache::GetInstance()->_pageMtx.unlock();
}
else {
assert(pTLSThreadCache);
pTLSThreadCache->Deallocate(obj, size);
}
}
四、访问_idSpanMap的线程安全问题
1.原理
- 由于申请和释放内存的时候都涉及到访问_idSpanMap,因此可能引发线程安全问题;
- 由于PageCache对_idSpanMap的访问都是加了全局锁的,因此只需要考虑在其他模块访问_idSpanMap的线程安全问题;
- 其他模块对_idSpanMap的访问是通过
MapObjectToSpan
函数完成的,我们可以在该函数中添加一个RAII锁,出了函数作用域自动解锁;
2.代码实现
PageCache.cpp
Span* PageCache::MapObjectToSpan(void* obj) {
PAGE_ID id = ((PAGE_ID)obj >> PAGE_SHIFT);
//加入RAII锁,出了函数作用域自动解锁
std::unique_lock<std::mutex> lock(_pageMtx);
auto ret = _idSpanMap.find(id);
if (ret != _idSpanMap.end()) {
return ret->second;
}
else {
//应该是一定能够获取到的
//如果获取不到就是出现了问题
assert(false);
return nullptr;
}
}
五、多线程环境下对比malloc测试
#define _CRT_SECURE_NO_WARNINGS 1
#include "Common.h"
#include "ConcurrentAlloc.h"
//ntimes:一轮申请和释放内存的次数
//rounds:轮次
//统计申请内存和释放内存各花费多少时间
void BenchmarkMalloc(size_t ntimes, size_t nworks, size_t rounds)
{
std::vector<std::thread> vthread(nworks);
std::atomic<size_t> malloc_costtime = 0;
std::atomic<size_t> free_costtime = 0;
for (size_t k = 0; k < nworks; ++k)
{
vthread[k] = std::thread([&, k]() {
std::vector<void*> v;
v.reserve(ntimes);
for (size_t j = 0; j < rounds; ++j)
{
size_t begin1 = clock();
for (size_t i = 0; i < ntimes; i++)
{
v.push_back(malloc(16));
//v.push_back(malloc((16 + i) % 8192 + 1));
}
size_t end1 = clock();
size_t begin2 = clock();
for (size_t i = 0; i < ntimes; i++)
{
free(v[i]);
}
size_t end2 = clock();
v.clear();
malloc_costtime += (end1 - begin1);
free_costtime += (end2 - begin2);
}
});
}
for (auto& t : vthread)
{
t.join();
}
//printf("%u个线程并发执行%u轮次,每轮次malloc %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, malloc_costtime);
//printf("%u个线程并发执行%u轮次,每轮次free %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, free_costtime);
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次malloc"
<< ntimes << "花费:" << malloc_costtime << "ms" << endl;
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次free"
<< ntimes << "花费:" << free_costtime << "ms" << endl;
printf("%u个线程并发malloc&free %u次,总计花费:%d ms\n",
nworks, nworks * rounds * ntimes, malloc_costtime + free_costtime);
}
// 单轮次申请释放次数 线程数 轮次
void BenchmarkConcurrentMalloc(size_t ntimes, size_t nworks, size_t rounds)
{
std::vector<std::thread> vthread(nworks);
std::atomic<size_t> malloc_costtime = 0;
std::atomic<size_t> free_costtime = 0;
for (size_t k = 0; k < nworks; ++k)
{
vthread[k] = std::thread([&]() {
std::vector<void*> v;
v.reserve(ntimes);
for (size_t j = 0; j < rounds; ++j)
{
size_t begin1 = clock();
for (size_t i = 0; i < ntimes; i++)
{
v.push_back(ConcurrentAlloc(16));
//v.push_back(ConcurrentAlloc((16 + i) % 8192 + 1));
}
size_t end1 = clock();
size_t begin2 = clock();
for (size_t i = 0; i < ntimes; i++)
{
ConcurrentFree(v[i]);
}
size_t end2 = clock();
v.clear();
malloc_costtime += (end1 - begin1);
free_costtime += (end2 - begin2);
}
});
}
for (auto& t : vthread)
{
t.join();
}
//printf("%u个线程并发执行%u轮次,每轮次concurrent alloc %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, malloc_costtime);
//printf("%u个线程并发执行%u轮次,每轮次concurrent dealloc %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, free_costtime);
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次concurrent alloc"
<< ntimes << "花费:" << malloc_costtime << "ms" << endl;
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次concurrent dealloc"
<< ntimes << "花费:" << free_costtime << "ms" << endl;
printf("%u个线程并发concurrent alloc&dealloc %u次,总计花费:%d ms\n",
nworks, nworks * rounds * ntimes, malloc_costtime + free_costtime);
}
int main()
{
size_t n = 10000;
cout << "==========================================================" <<
endl;
BenchmarkConcurrentMalloc(n, 4, 10);
cout << endl << endl;
//BenchmarkMalloc(n, 4, 10);
cout << "==========================================================" <<
endl;
return 0;
}
六、复杂问题的调试技巧
1.条件断点
- 想要在出错的位置停下来,就以出错的条件写一个if语句,语句里面随便写一条语句,在这里打断点,就可以在出错的时候停下来;
2.调用堆栈
- 监视窗口只能查看当前函数栈帧中的信息,如果想看到是哪个函数调用的当前函数,就打开调用堆栈;
3.死循环
- 程序疑似死循环。可以中断程序,会在死循环处停下;
4.调试过程
- 程序运行时,在
CentralCache.cpp
的GetOneSpan
函数中出现了bug:
错误显示在使用NextObj
函数访问tail的前4(或8)个字节的时候,产生了访问权限冲突,很有可能是访问到了空指针; - 使用监视窗口查看三个指针的情况,发现start一开始指向的是空指针;
- start是由
span->_pageID
计算得到,查看span->_pageID
,发现为0,按理说该span是不能挂载到spanList上的,可能是从PageCache获取span的时候出现了问题;
- 在
PageCache.cpp
的NewSpan
函数中每一处返回span之前,都加上条件断点,一旦返回的span的_pageID为0,就中断,果然在从第k个桶中获取span的时候触发了断点;
- 查看此时的
_spanLists[k]
,我们发现,该spanList不为空,但是其中span的_pageID是0;
- 再重新走一遍流程,发现在申请空间的环节没有出现问题,在释放一段空间后,再次申请空间,就出现了问题,因此,程序的问题可能出在
PageCache
释放空间的函数ReleaseSpanToPageCache
上,经过分析,是向前合并span的时候,没有将prevSpan从spanList上Erase掉,导致该spanList还有一个节点是已经delete掉的span,后面分配出去必然会导致空指针的访问;
因此应该从spanList上Erase掉该span:
5.测试结果
- 使用测试代码进行测试,在每个哈希桶都申请内存;
多线程环境下高并发内存池的效果要比malloc和free的效果好;
七、代码
Common.h
#pragma once
//公共头文件
#include <iostream>
#include <vector>
#include <assert.h>
#include <thread>
#include <mutex>
#include <algorithm>
#include <unordered_map>
using std::cout;
using std::endl;
using std::vector;
static const size_t MAX_BYTES = 256 * 1024; //ThreadCache能分配对象的最大字节数
static const size_t NFREELIST = 208; //central cache 最大的哈希桶数量
static const size_t NPAGES = 129; //page cache 哈希桶的数量
static const size_t PAGE_SHIFT = 13; //页与字节的转换
#ifdef _WIN32
#include<windows.h>
#else
//linux
#endif
#ifdef _WIN64
typedef unsigned long long PAGE_ID;
#elif _WIN32
typedef size_t PAGE_ID;
#elif
//linux
#endif
//直接去堆上申请空间
inline static void* SystemAlloc(size_t kpage) {
#ifdef _WIN32
void* ptr = VirtualAlloc(0, kpage << 13, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
#else
#endif
if (ptr == nullptr) {
throw std::bad_alloc();
}
return ptr;
}
//直接释放空间给堆
inline static void SystemFree(void* ptr) {
#ifdef _WIN32
VirtualFree(ptr, 0, MEM_RELEASE);
#else
#endif
}
// 访问obj的前4 / 8字节地址空间
static void*& NextObj(void* obj) {
return *(void**)obj;
}
//自由链表类,用于管理切分好的小内存块
class FreeList {
public:
void Push(void* obj) {
assert(obj);
//头插
NextObj(obj) = _freeList;
_freeList = obj;
_size++;
}
//范围插入
void PushRange(void* start, void* end, size_t size) {
assert(start);
assert(end);
NextObj(end) = _freeList;
_freeList = start;
_size += size;
}
void* Pop() {
assert(_freeList);
//头删
void* obj = _freeList;
_freeList = NextObj(obj);
_size--;
return obj;
}
//批量取走对象
void PopRange(void*& start, void* end, size_t size) {
assert(_size >= size); /// ???????? _size >= size
start = _freeList;
end = start;
for (size_t i = 0; i < size - 1; i++) {
end = NextObj(end);
}
_freeList = NextObj(end);
NextObj(end) = nullptr;
_size -= size;
}
bool Empty() {
return _freeList == nullptr;
}
//用于实现thread cache从central cache获取内存的慢开始算法
size_t& MaxSize() {
return _maxSize;
}
size_t Size() {
return _size;
}
private:
void* _freeList = nullptr;
size_t _maxSize = 1;
size_t _size = 0;
};
// 管理对齐和哈希映射规则的类
class SizeClass {
public:
//对齐规则
// 整体控制在最多10%左右的内碎片浪费
// [1,128] 8byte对齐 freelist[0,16)
// [128+1,1024] 16byte对齐 freelist[16,72)
// [1024+1,8*1024] 128byte对齐 freelist[72,128)
// [8*1024+1,64*1024] 1024byte对齐 freelist[128,184)
// [64*1024+1,256*1024] 8*1024byte对齐 freelist[184,208)
//RoundUp的子函数,根据对象大小和对齐数,返回对象对齐后的大小
static inline size_t _RoundUp(size_t size, size_t align) {
//if (size % align == 0) {
// return size;
//}
//else {
// return (size / align + 1) * align;
//}
//使用位运算能够得到一样的结果,但是位运算的效率很高
return ((size + align - 1) & ~(align - 1));
}
//计算当前对象size字节对齐之后对应的size
static inline size_t RoundUp(size_t size) {
if (size <= 128) {
//8字节对齐
return _RoundUp(size, 8);
}
else if (size <= 1024) {
//16字节对齐
return _RoundUp(size, 16);
}
else if (size <= 8 * 1024) {
//128字节对齐
return _RoundUp(size, 128);
}
else if (size <= 64 * 1024) {
//1024字节对齐
return _RoundUp(size, 1024);
}
else if (size <= 256 * 1024) {
//8KB字节对齐
return _RoundUp(size, 8 * 1024);
}
else {
//大于256KB就以页为单位对齐
return _RoundUp(size, 1 << PAGE_SHIFT);
}
return -1;
}
//Index的子函数,用于计算映射的哈希桶下标
static inline size_t _Index(size_t size, size_t alignShift) {
//if (size % align == 0) {
// return size / align - 1;
//}
//else {
// return size / align;
//}
//使用位运算能够得到一样的结果,但是位运算的效率很高
//使用位运算需要将输入参数由对齐数改为对齐数是2的几次幂、
return ((size + (1 << alignShift) - 1) >> alignShift) - 1;
}
//计算对象size映射到哪一个哈希桶(freelist)
static inline size_t Index(size_t size) {
assert(size <= MAX_BYTES);
//每个区间有多少个哈希桶
static int groupArray[4] = { 16, 56, 56, 56 };
if (size <= 128) {
return _Index(size, 3);
}
else if (size <= 1024) {
//由于前128字节不是16字节对齐,因此需要减去该部分,单独计算16字节对齐的下标
//再在最终结果加上全部的8字节对齐哈希桶个数
return _Index(size - 128, 4) + groupArray[0];
}
else if (size <= 8 * 1024) {
return _Index(size - 1024, 7) + groupArray[0] + groupArray[1];
}
else if (size <= 64 * 1024) {
return _Index(size - 8 * 1024, 10) + groupArray[0] + groupArray[1] + groupArray[2];
}
else if (size <= 256 * 1024) {
return _Index(size - 64 * 1024, 13) + groupArray[0] + groupArray[1] + groupArray[2] + groupArray[3];
}
else {
assert(false);
}
return -1;
}
//thread cache一次从central cache中获取多少内存块
static size_t NumMoveSize(size_t size) {
//一次获取的内存块由对象的大小来决定
assert(size > 0);
//将获取的数量控制在[2, 512]
size_t num = MAX_BYTES / size;
if (num < 2) {
num = 2;
}
if (num > 512) {
num = 512;
}
return num;
}
//计算central cache一次向page cache获取多少页的span
static size_t NumMovePage(size_t size) {
assert(size > 0);
//先计算该对象一次申请内存块的上限值
size_t num = NumMoveSize(size);
//计算上限的空间大小
size_t npage = num * size;
//转换成page单位
npage >>= PAGE_SHIFT;
if (npage == 0) {
npage = 1;
}
return npage;
}
};
struct Span
{
PAGE_ID _pageID = 0; // 大块内存起始页的页号
size_t _n = 0; // 页的数量
Span* _next = nullptr; // 双向链表的结构
Span* _prev = nullptr;
size_t _objSize = 0; // 切好的小对象的大小
size_t _useCount = 0; // 切好小块内存,被分配给thread cache的计数
void* _freeList = nullptr; // 切好的小块内存的自由链表
bool _isUse = false; // 是否在被使用
};
class SpanList {
public:
SpanList() {
_head = new Span;
_head->_next = _head;
_head->_prev = _head;
}
void Insert(Span* pos, Span* newSapn) {
assert(pos);
assert(newSapn);
Span* prev = pos->_prev;
prev->_next = newSapn;
newSapn->_prev = prev;
newSapn->_next = pos;
pos->_prev = newSapn;
}
void Erase(Span* pos) {
assert(pos);
assert(pos != _head);
//不用释放空间
Span* prev = pos->_prev;
Span* next = pos->_next;
prev->_next = next;
next->_prev = prev;
}
Span* Begin() {
return _head->_next;
}
Span* End() {
return _head;
}
bool Empty() {
return _head->_next == _head;
}
void PushFront(Span* newSapn) {
Insert(Begin(), newSapn);
}
Span* PopFront() {
Span* front = _head->_next;
Erase(front);
return front;
}
private:
Span* _head; //头节点
public:
std::mutex _mtx; //桶锁
};
ThreadCache.h
#pragma once
#include "Common.h"
class ThreadCache {
public:
//申请和释放对象内存
void* Allocate(size_t size);
void Deallocate(void* obj, size_t size);
//从中心缓存获取对象
void* FetchFromCentralCache(size_t index, size_t alignSize);
//自由链表过长时,回收一段链表到中心缓存
void ListTooLong(FreeList& list, size_t size);
private:
FreeList _freeLists[NFREELIST];
};
//声明_declspec(thread)后,会为每一个线程创建一个单独的拷贝
//使用_declspec(thread)声明了ThreadCache*指针变量,则该指针在该线程中会创建一份单独的拷贝
//pTLSThreadCache指向的对象在本线程内是能够全局访问的,但是无法被其他线程访问到,这就做到了多线程情景下的无锁访问
static _declspec(thread) ThreadCache* pTLSThreadCache = nullptr;
ThreadCache.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include "ThreadCache.h"
#include "CentralCache.h"
void* ThreadCache::Allocate(size_t size) {
assert(size <= MAX_BYTES);
//获取对齐后的大小及对应的哈希桶下标
size_t alignSize = SizeClass::RoundUp(size);
size_t index = SizeClass::Index(size);
if (!_freeLists[index].Empty()) {
//若对应的freeList桶不为空,直接pop一个内存块给该对象
return _freeLists[index].Pop();
}
else {
//否则需要从CentralCache获取内存空间
return ThreadCache::FetchFromCentralCache(index, alignSize);
}
}
void ThreadCache::Deallocate(void* obj, size_t size) {
assert(obj);
assert(size <= MAX_BYTES);
//找该对象对应的freeList的桶,直接插入
size_t index = SizeClass::Index(size);
_freeLists[index].Push(obj);
//当链表的长度大于一次批量申请的内存块的数量时,就归还一段list给central cache
if (_freeLists[index].Size() >= _freeLists[index].MaxSize()) {
ListTooLong(_freeLists[index], size);
}
}
void ThreadCache::ListTooLong(FreeList& list, size_t size) {
void* start = nullptr;
void* end = nullptr;
//从list中取出MaxSize长度的链表
list.PopRange(start, end, list.MaxSize());
//归还给CentralCache的对应span
CentralCache::GetInstance()->ReleaseListToSpans(start, size);
}
void* ThreadCache::FetchFromCentralCache(size_t index, size_t alignSize) {
//慢开始算法
//计算当前从Central Cache中获取内存块的最大数量
size_t batchNum = min(_freeLists[index].MaxSize(), SizeClass::NumMoveSize(alignSize));
//如果MaxSize未达上限,就将MaxSize + 1
if (batchNum == _freeLists[index].MaxSize()) {
_freeLists[index].MaxSize() += 1;
}
void* start = nullptr;
void* end = nullptr;
size_t actualNum = CentralCache::GetInstance()->FetchRangeObj(start, end, batchNum, alignSize);
assert(actualNum >= 1);
if (actualNum == 1) {
//如果最终获取的数量为1,直接返回给对象
assert(start == end);
return start;
}
else {
//如果最终获取的数量多于一个,则返回第一个给对象,剩下的插入freeList里
_freeLists[index].PushRange(NextObj(start), end, actualNum - 1); // 批量插入
//NextObj(start) = nullptr;
return start;
}
}
CentralCache.h
#pragma once
#include "Common.h"
#include "PageCache.h"
//饿汉单例模式
class CentralCache {
public:
static CentralCache* GetInstance() {
return &_sInstance;
}
//从CentralCache获取一定数量的内存对象给ThreadCache
size_t FetchRangeObj(void*& start, void*& end, size_t batchNum, size_t size);
//获取一个非空的Sapn
Span* GetOneSpan(SpanList& spanList, size_t size);
//归还一段list到对应的span
void ReleaseListToSpans(void* start, size_t byte_size);
private:
SpanList _spanLists[NFREELIST];
//构造函数私有化
CentralCache()
{}
//不生成默认拷贝构造
CentralCache(const CentralCache&) = delete;
static CentralCache _sInstance;
};
CentralCache.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include "CentralCache.h"
//单例模式静态成员的定义
CentralCache CentralCache::_sInstance;
//从CentralCache获取一定数量的内存对象给ThreadCache
size_t CentralCache::FetchRangeObj(void*& start, void*& end, size_t batchNum, size_t size) {
//先根据对象size获取对应的spanList下标
size_t index = SizeClass::Index(size);
//每个线程访问spanList时需要加锁
_spanLists[index]._mtx.lock();
//获取非空的span
Span* span = GetOneSpan(_spanLists[index], size);
assert(span);
assert(span->_freeList);
//从span中获取batchNum个对象,若不够,就有多少拿多少
start = span->_freeList;
end = start;
size_t i = 0;
size_t actualNum = 1; // 实际拿到的对象数量
while (i < batchNum - 1 && NextObj(end) != nullptr) {
end = NextObj(end);
actualNum++;
i++;
}
//在span中去掉这一段对象
span->_freeList = NextObj(end);
NextObj(end) = nullptr;
//更新span->_useCount参数
span->_useCount += actualNum;
_spanLists[index]._mtx.unlock();
return actualNum;
}
Span* CentralCache::GetOneSpan(SpanList& spanList, size_t size) {
//先检查该SpanList有没有未分配的Span
Span* it = spanList.Begin();
while (it != spanList.End()) {
if (it->_freeList != nullptr) {
return it;
}
else {
it = it->_next;
}
}
//先把central cache 的桶锁解掉,这样如果其他线程释放对象回来,就不会被阻塞
spanList._mtx.unlock();
//SpanList中没有空闲的Span,需要向page cache申请
//在此处加上page cache的全局锁,NewSpan的所有操作都是加锁进行的
PageCache::GetInstance()->_pageMtx.lock();
Span* span = PageCache::GetInstance()->NewSpan(SizeClass::NumMovePage(size));
//更新_isUse属性
span->_isUse = true;
//存储该span切分对象的size
span->_objSize = size;
PageCache::GetInstance()->_pageMtx.unlock();
//从page cache获取到了新的span,需要进行切分
//无需在此加上桶锁,因为该span还没有放到spanList中,其他线程访问不到
//计算span大块内存的起始地址和大块内存的大小(字节数)
char* start = (char*)(span->_pageID << PAGE_SHIFT);
size_t bytes = span->_n << PAGE_SHIFT;
char* end = start + bytes;
//把大块内存切成自由链表链接起来
//先切一块下来做头,方便尾插
span->_freeList = start;
start += size;
void* tail = span->_freeList;
while (start < end) {
NextObj(tail) = start;
tail = NextObj(tail);
start += size;
}
NextObj(tail) = nullptr;
//在span挂载到spanList之前加上桶锁
spanList._mtx.lock();
spanList.PushFront(span);
return span;
}
void CentralCache::ReleaseListToSpans(void* start, size_t byte_size) {
size_t index = SizeClass::Index(byte_size);
_spanLists[index]._mtx.lock();
//该段list的尾部指针已经置空,遍历到空指针就停止
while (start) {
//将内存块对象挂载到对应的span上
void* next = NextObj(start);
//获取该对象对应的span
Span* span = PageCache::GetInstance()->MapObjectToSpan(start);
NextObj(start) = span->_freeList;
span->_freeList = start;
//更新_useCount
span->_useCount--;
//说明该span的小块内存都回收了
//这个span就可以回收给page cache,由page cache去做前后页的合并
if (span->_useCount == 0) {
_spanLists[index].Erase(span);
span->_prev = nullptr;
span->_next = nullptr;
span->_freeList = nullptr;
//释放span给page cache的时候,使用page cache的锁就可以了
//将桶锁先解除,方便其他线程在该桶上申请和释放内存
_spanLists[index]._mtx.unlock();
PageCache::GetInstance()->_pageMtx.lock();
PageCache::GetInstance()->ReleaseSpanToPageCache(span);
PageCache::GetInstance()->_pageMtx.unlock();
_spanLists[index]._mtx.lock();
}
start = next;
}
_spanLists[index]._mtx.unlock();
}
PageCache.h
#pragma once
#include "Common.h"
#include "ObjectPool.h"
//单例模式
class PageCache {
public:
static PageCache* GetInstance() {
return &_sInstance;
}
std::mutex _pageMtx; //全局锁
//获取一个k页的Span
Span* NewSpan(size_t k);
//获取对象到span的映射
Span* MapObjectToSpan(void* obj);
//释放空闲span回到page cache,并合并相邻的span
void ReleaseSpanToPageCache(Span* span);
private:
SpanList _spanLists[NPAGES];
// 用于存储页号到Span的映射关系
std::unordered_map<PAGE_ID, Span*> _idSpanMap;
//定长内存池用于代替new
ObjectPool<Span> _spanPool;
PageCache() {}
PageCache(const PageCache&) = delete;
static PageCache _sInstance;
};
PageCache.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include "PageCache.h"
PageCache PageCache::_sInstance;
Span* PageCache::NewSpan(size_t k) {
assert(k > 0);
//大于128页的span直接从堆申请
if (k > NPAGES - 1) {
void* ptr = SystemAlloc(k);
//Span* span = new Span;
Span* span = _spanPool.New();
span->_n = k;
span->_pageID = (PAGE_ID)ptr >> PAGE_SHIFT;
//存储页号与span的映射关系
_idSpanMap[span->_pageID] = span;
return span;
}
//先检查第k个桶里面有没有span
if (!_spanLists[k].Empty()) {
有就返回
Span* kSpan = _spanLists[k].PopFront();
// 建立id和span的映射,方便central cache回收小块内存时,查找对应的span
for (PAGE_ID i = 0; i < kSpan->_n; ++i)
{
_idSpanMap[kSpan->_pageID + i] = kSpan;
}
return kSpan;
}
//没有就需要检查后面的桶有没有更大的span,如果有可以拆分
for (size_t i = k + 1; i < NPAGES; i++) {
if (!_spanLists[i].Empty()) {
Span* nspan = _spanLists[i].PopFront();
//Span* kspan = new Span;
Span* kspan = _spanPool.New();
//在nspan头部且下一个k页的span
//kspan返回
//nspan剩下的部分挂载到相应的桶上
kspan->_pageID = nspan->_pageID;
kspan->_n = k;
nspan->_pageID += k;
nspan->_n -= k;
_spanLists[nspan->_n].PushFront(nspan);
//存储nspan的首尾页号与Span的关系,方便page cache回收内存时进行合并查找
_idSpanMap[nspan->_pageID] = nspan;
_idSpanMap[nspan->_pageID + nspan->_n - 1] = nspan;
//存储kspan每一页的页号与span的映射,方便central cache回收小块内存时,查找对应的span
for (PAGE_ID i = 0; i < kspan->_n; i++) {
_idSpanMap[kspan->_pageID + i] = kspan;
}
return kspan;
}
}
//走到这里说明没有更大的span了,需要向堆申请一个128页的大块内存
//Span* bigSpan = new Span;
Span* bigSpan = _spanPool.New();
void* ptr = SystemAlloc(NPAGES - 1);
bigSpan->_pageID = (PAGE_ID)ptr >> PAGE_SHIFT;
bigSpan->_n = NPAGES - 1;
_spanLists[bigSpan->_n].PushFront(bigSpan);
//此时需要将_spanLists中的128页的内存切分,递归调用一下
return NewSpan(k);
}
Span* PageCache::MapObjectToSpan(void* obj) {
PAGE_ID id = ((PAGE_ID)obj >> PAGE_SHIFT);
//加入RAII锁,出了函数作用域自动解锁
std::unique_lock<std::mutex> lock(_pageMtx);
auto ret = _idSpanMap.find(id);
if (ret != _idSpanMap.end()) {
return ret->second;
}
else {
//应该是一定能够获取到的
//如果获取不到就是出现了问题
assert(false);
return nullptr;
}
}
void PageCache::ReleaseSpanToPageCache(Span* span) {
//大于128页的span,直接归还给堆
if (span->_n > NPAGES - 1) {
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
SystemFree(ptr);
//delete span;
_spanPool.Delete(span);
return;
}
//对span前后的相邻页进行合并,缓解内存碎片的问题
//向前合并
while (1) {
//前一页的id
PAGE_ID prevId = span->_pageID - 1;
//从map中寻找页号与span的映射
auto ret = _idSpanMap.find(prevId);
//前面的页号没有,不合并
if (ret == _idSpanMap.end()) {
break;
}
//前面相邻页的span在使用,不合并
Span* prevSpan = ret->second;
if (prevSpan->_isUse == true) {
break;
}
//合并超出128页的span没法管理,不合并
if (prevSpan->_n + span->_n > NPAGES - 1) {
break;
}
//合并前面的span
span->_pageID = prevSpan->_pageID;
span->_n += prevSpan->_n;
_spanLists[prevSpan->_n].Erase(prevSpan);
//delete prevSpan;
_spanPool.Delete(prevSpan);
}
//向后合并
while (1) {
PAGE_ID nextId = span->_pageID + span->_n;
auto ret = _idSpanMap.find(nextId);
if (ret == _idSpanMap.end()) {
break;
}
Span* nextSpan = ret->second;
if (nextSpan->_isUse == true) {
break;
}
if (nextSpan->_n + span->_n > NPAGES - 1) {
break;
}
span->_n += nextSpan->_n;
_spanLists[nextSpan->_n].Erase(nextSpan);
//delete nextSpan;
_spanPool.Delete(nextSpan);
}
//将合并好的span挂载到对应的哈希桶,更新isUse
_spanLists[span->_n].PushFront(span);
span->_isUse = false;
_idSpanMap[span->_pageID] = span;
_idSpanMap[span->_pageID + span->_n - 1] = span;
}
ObjectPool.h
#pragma once
#include "Common.h"
template<class T>
class ObjectPool {
public:
T* New() {
T* obj = nullptr;
//若freeList不为空,先分配这里的空间
if (_freeList) {
void* next = *((void**)_freeList);
obj = (T*)_freeList;
_freeList = next;
}
else {
if (_remainBytes < sizeof(T)) { //当剩余空间不足一个对象时,就需要重新申请空间
//这其中也包括了首次申请空间
_remainBytes = 128 * 1024;
//_memory = (char*)malloc(_remainBytes);
_memory = (char*)SystemAlloc(_remainBytes >> 13); // 直接在堆上申请空间
if (_memory == nullptr) {
throw std::bad_alloc();
}
}
//为新对象分配内存空间
obj = (T*)_memory;
//一个内存块的大小不能小于当前系统指针的大小,因为freeList需要存指针
int objSize = sizeof(T) < sizeof(void*) ? sizeof(void*) : sizeof(T);
_memory += objSize;
_remainBytes -= objSize;
}
//自定义类型只开辟了空间,并没有初始化,定位new,显式调用类的构造函数
new(obj)T;
return obj;
}
void Delete(T* obj) {
//显式调用类的析构函数
obj->~T();
//头插
*((void**)obj) = _freeList;
_freeList = obj;
}
private:
char* _memory = nullptr;
void* _freeList = nullptr;
int _remainBytes = 0;
};
ConcurrentAlloc.h
#pragma once
#include "Common.h"
#include "ThreadCache.h"
#include "PageCache.h"
static void* ConcurrentAlloc(size_t size) {
if (size > MAX_BYTES) {
//大于256KB的直接向page cache申请
size_t alignSize = SizeClass::RoundUp(size);
size_t kpage = alignSize >> PAGE_SHIFT;
PageCache::GetInstance()->_pageMtx.lock();
Span* span = PageCache::GetInstance()->NewSpan(kpage);
span->_isUse = true;
span->_objSize = size;
PageCache::GetInstance()->_pageMtx.unlock();
void* ptr = (void*)(span->_pageID << PAGE_SHIFT);
return ptr;
}
else {
if (pTLSThreadCache == nullptr) {
//如果pTLSThreadCache指针是空的,就构造一个ThreadCache对象,并指向它
//则这个ThreadCache对象就是本线程专属的ThreadCache对象
static ObjectPool<ThreadCache> tcPool;
//pTLSThreadCache = new ThreadCache;
pTLSThreadCache = tcPool.New();
}
//cout << std::this_thread::get_id() << " : " << pTLSThreadCache << endl;
//使用pTLSThreadCache访问本线程专属的ThreadCache对象来开辟空间
return pTLSThreadCache->Allocate(size);
}
}
static void ConcurrentFree(void* obj) {
Span* span = PageCache::GetInstance()->MapObjectToSpan(obj);
size_t size = span->_objSize;
if (size > MAX_BYTES) {
//大于256KB的内存直接释放给page cache
PageCache::GetInstance()->_pageMtx.lock();
PageCache::GetInstance()->ReleaseSpanToPageCache(span);
PageCache::GetInstance()->_pageMtx.unlock();
}
else {
assert(pTLSThreadCache);
pTLSThreadCache->Deallocate(obj, size);
}
}
Benchmark.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include "Common.h"
#include "ConcurrentAlloc.h"
//ntimes:一轮申请和释放内存的次数
//rounds:轮次
//统计申请内存和释放内存各花费多少时间
void BenchmarkMalloc(size_t ntimes, size_t nworks, size_t rounds)
{
std::vector<std::thread> vthread(nworks);
std::atomic<size_t> malloc_costtime = 0;
std::atomic<size_t> free_costtime = 0;
for (size_t k = 0; k < nworks; ++k)
{
vthread[k] = std::thread([&, k]() {
std::vector<void*> v;
v.reserve(ntimes);
for (size_t j = 0; j < rounds; ++j)
{
size_t begin1 = clock();
for (size_t i = 0; i < ntimes; i++)
{
//v.push_back(malloc(16));
v.push_back(malloc((16 + i) % 8192 + 1));
}
size_t end1 = clock();
size_t begin2 = clock();
for (size_t i = 0; i < ntimes; i++)
{
free(v[i]);
}
size_t end2 = clock();
v.clear();
malloc_costtime += (end1 - begin1);
free_costtime += (end2 - begin2);
}
});
}
for (auto& t : vthread)
{
t.join();
}
//printf("%u个线程并发执行%u轮次,每轮次malloc %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, malloc_costtime);
//printf("%u个线程并发执行%u轮次,每轮次free %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, free_costtime);
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次malloc"
<< ntimes << "花费:" << malloc_costtime << "ms" << endl;
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次free"
<< ntimes << "花费:" << free_costtime << "ms" << endl;
printf("%u个线程并发malloc&free %u次,总计花费:%d ms\n",
nworks, nworks * rounds * ntimes, malloc_costtime + free_costtime);
}
// 单轮次申请释放次数 线程数 轮次
void BenchmarkConcurrentMalloc(size_t ntimes, size_t nworks, size_t rounds)
{
std::vector<std::thread> vthread(nworks);
std::atomic<size_t> malloc_costtime = 0;
std::atomic<size_t> free_costtime = 0;
for (size_t k = 0; k < nworks; ++k)
{
vthread[k] = std::thread([&]() {
std::vector<void*> v;
v.reserve(ntimes);
for (size_t j = 0; j < rounds; ++j)
{
size_t begin1 = clock();
for (size_t i = 0; i < ntimes; i++)
{
//v.push_back(ConcurrentAlloc(16));
v.push_back(ConcurrentAlloc((16 + i) % 8192 + 1));
}
size_t end1 = clock();
size_t begin2 = clock();
for (size_t i = 0; i < ntimes; i++)
{
ConcurrentFree(v[i]);
}
size_t end2 = clock();
v.clear();
malloc_costtime += (end1 - begin1);
free_costtime += (end2 - begin2);
}
});
}
for (auto& t : vthread)
{
t.join();
}
//printf("%u个线程并发执行%u轮次,每轮次concurrent alloc %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, malloc_costtime);
//printf("%u个线程并发执行%u轮次,每轮次concurrent dealloc %u次: 花费:%d ms\n",
// nworks, rounds, ntimes, free_costtime);
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次concurrent alloc"
<< ntimes << "花费:" << malloc_costtime << "ms" << endl;
cout << nworks << "个线程并发执行" << rounds << "轮次,每轮次concurrent dealloc"
<< ntimes << "花费:" << free_costtime << "ms" << endl;
printf("%u个线程并发concurrent alloc&dealloc %u次,总计花费:%d ms\n",
nworks, nworks * rounds * ntimes, malloc_costtime + free_costtime);
}
int main()
{
size_t n = 10000;
cout << "==========================================================" <<
endl;
BenchmarkConcurrentMalloc(n, 4, 10);
cout << endl << endl;
BenchmarkMalloc(n, 4, 10);
cout << "==========================================================" <<
endl;
return 0;
}