如果一个内存池需要线程同步了,估计和默认的内存操作也差不了多远了。现在对内存操作的优化只是在优化线程同步操作上面了。默认的lock和unlock可能实现得过于完美,因而要求更多的cpu周期。如果选择更原子的lock和unlock实现,还是可以加快内存操作的速度的。
多线程内存池在实现上也就是在申请和释放外面包裹了一对加锁和解锁操作而已。
如果我们采用模板编程,就可以实现一个功能强悍的模板类,该模板类有两个参数,单线程内存池和锁。这样的话,可以组合出很多情况。比如说,单线程内存池有对象大小固定和不固定两种实现,锁在Windows下可以用临界区,互斥体,信号量,事件实现。这样最多可以组合出8种不同的实例化。
一般说来互斥体比临界区慢很多,在这里可以进行很好的测试。我实现了临界区和互斥体版本的锁,经过测试发现前者比后者快30多倍。因为互斥体据说是内核对象,而临界区是用户态对象,那么互斥体的使用就需要系统在用户态和内核态之间进行切换,肯定会消耗更多的时间。
互斥体版本代码如下,
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>
template <typename T> class MemoryPool
{
public:
MemoryPool(size_t size = EXPANSION_SIZE)
{
expandTheFreeList(size);
}
~MemoryPool();
//allocate a T element from the free list.
void* alloc(size_t size)
{
if (next == NULL)
{
expandTheFreeList();
}
MemoryPool<T>* head = next;
next = head->next;
return head;
}
//return a T element to the free list.
void free(void* doomed)
{
MemoryPool<T>* head = static_cast< MemoryPool<T>* > (doomed);
head->next = next;
next = head;
}
private:
//next element on the free list.
MemoryPool<T>* next;
//if the freelist is empty, expand it by this amount.
enum {EXPANSION_SIZE = 32};
//add free elements to the free list
void expandTheFreeList(int howMany = EXPANSION_SIZE);
};
template <typename T> MemoryPool<T> :: ~MemoryPool()
{
MemoryPool<T>* nextPtr = NULL;
while (nextPtr)
{
nextPtr = next;
next = next->next;
delete [] nextPtr;
}
}
template <typename T> void MemoryPool<T> :: expandTheFreeList(int howMany)
{
//we must allocate an object enough to contain the next pointer
size_t size = sizeof(T) > sizeof(MemoryPool<T>*) ? sizeof(T) :
sizeof(MemoryPool<T>*);
MemoryPool<T>* runner = (MemoryPool<T>*) new char[size];
next = runner;
for (int i = 0; i < howMany; ++i)
{
runner->next = (MemoryPool<T>*) new char[size];
runner = runner->next;
}
runner->next = NULL;
}
class ABClock //abstract base class
{
public:
virtual ~ABClock() {}
virtual void lock() = 0;
virtual void unlock() = 0;
};
class MutexLock : public ABClock
{
public:
MutexLock()
{
hMutex = CreateMutex(NULL, FALSE, NULL);
}
~MutexLock()
{
CloseHandle(hMutex);
}
void lock()
{
WaitForSingleObject(hMutex, INFINITE);
}
void unlock()
{
ReleaseMutex(hMutex);
}
private:
HANDLE hMutex;
};
template <typename POOLTYPE, typename LOCK>
class MTMemoryPool
{
public:
//allocate an element from the freelist.
void* alloc(size_t size)
{
void* mem;
theLock.lock();
mem = stPool.alloc(size);
theLock.unlock();
return mem;
}
//return an element to the freelist
void free(void* someElement)
{
theLock.lock();
stPool.free(someElement);
theLock.unlock();
}
private:
POOLTYPE stPool;//Single-threaded pool.
LOCK theLock;
};
class Rational
{
public:
Rational(int a = 0, int b = 1) : n(a), d(b) {}
void* operator new(size_t size)
{
return memPool->alloc(size);
}
void operator delete(void* doomed, size_t size)
{
memPool->free(doomed);
}
static void newMemPool()
{
memPool = new MTMemoryPool< MemoryPool<Rational>, MutexLock>;
}
static void deleteMemPool()
{
delete memPool;
}
private:
int n;
int d;
static MTMemoryPool< MemoryPool<Rational>, MutexLock>* memPool;
};
MTMemoryPool< MemoryPool<Rational>, MutexLock>* Rational::memPool = NULL;
int main()
{
const int ARRAY_SIZE = 1000;
const int LOOP_TIMES = 5000;
Rational* array[ARRAY_SIZE];
clock_t beg = clock();
Rational::newMemPool();
for (int i = 0; i < LOOP_TIMES; ++i)
{
for (int j = 0; j < ARRAY_SIZE; ++j)
{
array[j] = new Rational(j);
}
for (int j = 0; j < ARRAY_SIZE; ++j)
{
delete array[j];
}
}
clock_t end = clock();
printf("use %f second(s).\n", 1.0 * (end - beg) / CLOCKS_PER_SEC);
system("pause");
Rational::deleteMemPool();
return 0;
}
运行结果,
临界区版本代码如下,
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>
template <typename T> class MemoryPool
{
public:
MemoryPool(size_t size = EXPANSION_SIZE)
{
expandTheFreeList(size);
}
~MemoryPool();
//allocate a T element from the free list.
void* alloc(size_t size)
{
if (next == NULL)
{
expandTheFreeList();
}
MemoryPool<T>* head = next;
next = head->next;
return head;
}
//return a T element to the free list.
void free(void* doomed)
{
MemoryPool<T>* head = static_cast< MemoryPool<T>* > (doomed);
head->next = next;
next = head;
}
private:
//next element on the free list.
MemoryPool<T>* next;
//if the freelist is empty, expand it by this amount.
enum {EXPANSION_SIZE = 32};
//add free elements to the free list
void expandTheFreeList(int howMany = EXPANSION_SIZE);
};
template <typename T> MemoryPool<T> :: ~MemoryPool()
{
MemoryPool<T>* nextPtr = NULL;
while (nextPtr)
{
nextPtr = next;
next = next->next;
delete [] nextPtr;
}
}
template <typename T> void MemoryPool<T> :: expandTheFreeList(int howMany)
{
//we must allocate an object enough to contain the next pointer
size_t size = sizeof(T) > sizeof(MemoryPool<T>*) ? sizeof(T) :
sizeof(MemoryPool<T>*);
MemoryPool<T>* runner = (MemoryPool<T>*) new char[size];
next = runner;
for (int i = 0; i < howMany; ++i)
{
runner->next = (MemoryPool<T>*) new char[size];
runner = runner->next;
}
runner->next = NULL;
}
class ABClock //abstract base class
{
public:
virtual ~ABClock() {}
virtual void lock() = 0;
virtual void unlock() = 0;
};
class CriticalSectionLock : public ABClock
{
public:
CriticalSectionLock()
{
InitializeCriticalSection(&csMyCriticalSection);
}
~CriticalSectionLock()
{
DeleteCriticalSection(&csMyCriticalSection);
}
void lock()
{
EnterCriticalSection(&csMyCriticalSection);
}
void unlock()
{
LeaveCriticalSection(&csMyCriticalSection);
}
private:
CRITICAL_SECTION csMyCriticalSection;
};
template <typename POOLTYPE, typename LOCK>
class MTMemoryPool
{
public:
//allocate an element from the freelist.
void* alloc(size_t size)
{
void* mem;
theLock.lock();
mem = stPool.alloc(size);
theLock.unlock();
return mem;
}
//return an element to the freelist
void free(void* someElement)
{
theLock.lock();
stPool.free(someElement);
theLock.unlock();
}
private:
POOLTYPE stPool;//Single-threaded pool.
LOCK theLock;
};
class Rational
{
public:
Rational(int a = 0, int b = 1) : n(a), d(b) {}
void* operator new(size_t size)
{
return memPool->alloc(size);
}
void operator delete(void* doomed, size_t size)
{
memPool->free(doomed);
}
static void newMemPool()
{
memPool = new MTMemoryPool< MemoryPool<Rational>, CriticalSectionLock>;
}
static void deleteMemPool()
{
delete memPool;
}
private:
int n;
int d;
static MTMemoryPool< MemoryPool<Rational>, CriticalSectionLock>* memPool;
};
MTMemoryPool< MemoryPool<Rational>, CriticalSectionLock>* Rational::memPool = NULL;
int main()
{
const int ARRAY_SIZE = 1000;
const int LOOP_TIMES = 5000;
Rational* array[ARRAY_SIZE];
clock_t beg = clock();
Rational::newMemPool();
for (int i = 0; i < LOOP_TIMES; ++i)
{
for (int j = 0; j < ARRAY_SIZE; ++j)
{
array[j] = new Rational(j);
}
for (int j = 0; j < ARRAY_SIZE; ++j)
{
delete array[j];
}
}
clock_t end = clock();
printf("use %f second(s).\n", 1.0 * (end - beg) / CLOCKS_PER_SEC);
system("pause");
Rational::deleteMemPool();
return 0;
}
运行结果,
两个版本的代码只在线程同步锁的实现上有差别,但是速度却相差了30多倍。
http://www.xpc-yx.com/2012/11/19/%e5%a4%9a%e7%ba%bf%e7%a8%8b%e5%86%85%e5%ad%98%e6%b1%a0/