游戏引擎中的多线程
在游戏引擎开发中, 多线程渲染是经常采用的线程模型, 大名鼎鼎的虚幻4引擎就是采用了多线程渲染,三条与渲染有关线程,如GameThread(游戏线程), RenderThread(渲染线程),RHIThread(RHI线程)
其中RenderThread 和 RHIThread构成了经典的生成者和消费者模式(一对一)
问题设计
对比在“生产者消费者问题下” Queue和RingBuffer的效率问题
实验:在 4000000 次数中在相应数据结构的不断进行数据的读取和写入
(1)单线程中的链式结构Queue的效率
(2)一写线程和一读线程在链式结构Queue的效率
(3)一写线程和一读线程在RingBuffer的效率
队列(Queue链式结构)
这里用的stl queue, 采用链表的实现方式, 数据结构具体实现就不说了。stl queue是线程不安全的,不加锁的情况的直接很容易崩溃。
#include <iostream>
#include<thread>
#include<time.h>
#include<windows.h>
#include<mutex>
#define ARRAY_MAX_SIZE 4000000
using namespace std;
template<typename DataType, int Length = 200000>
class RingBuffer
{
private:
DataType datas[Length];
int head = 0;
int tail = 0;
public:
bool IsFull()
{
return (tail + 1) % Length == head;
}
bool IsEmpty()
{
return tail == head;
}
bool Push(DataType data)
{
if (IsFull())
return false;
datas[tail] = data;
tail = (tail + 1) % Length;
return true;
}
bool Pop(DataType& data)
{
if (IsEmpty())
return false;
data = datas[head];
head = (head + 1) % Length;
return true;
}
int Size()
{
if (tail >= head)
return tail - head;
else
return tail + Length - head;
}
};
clock_t start = 0;
int b = 0;
mutex mt;
bool isFinish = false;
RingBuffer<int> ringbuffer;
void WriteThread()
{
for (int index = 0; index < ARRAY_MAX_SIZE; ++index)
{
//mt.lock();
if (!ringbuffer.Push(index))
{
index = index - 1;
}
//mt.unlock();
}
isFinish = true;
clock_t end = clock();
printf("Write DataQueue Num = %d\n ", ringbuffer.Size());
printf("time = %f b = %d\n ", (double)(end - start) / CLOCKS_PER_SEC, b);
}
void ReadThread()
{
while (1)
{
if (isFinish && ringbuffer.IsEmpty())
{
break;
}
else
{
if (!ringbuffer.IsEmpty())
{
//mt.lock();
ringbuffer.Pop(b);
//mt.unlock();
}
}
}
clock_t end = clock();
printf("Read DataQueue Num = %d\n ", ringbuffer.Size());
printf("finish time = %f b = %d\n ", (double)(end - start) / CLOCKS_PER_SEC, b);
}
单线程链式Queue
int main()
{
WriteThread();
ReadThread();;
system("Pause");
return 0;
}
一写线程和一读线程
int main()
{
thread t1(WriteThread);
thread t2(ReadThread);
t1.join();
t2.join();
system("Pause");
return 0;
}
上面的代码中注释的删除注释号 得lock相关代码
Ringbuffer(环形缓存)
环形缓存作用其实是队列,只不过内部实现远远比链式队列简单很多。数据存储采用数组,有个head索引头(front)和tail尾索引(rear)
当数据进队: rear前移 rear = (rear + 1) % size
当数据出队: front前移 front = (front + 1) % size
template<typename DataType, int Length = 200000>
class RingBuffer
{
private:
DataType datas[Length];
int head = 0;
int tail = 0;
public:
bool IsFull()
{
return (tail + 1) % Length == head;
}
bool IsEmpty()
{
return tail == head;
}
bool Push(DataType data)
{
if (IsFull())
return false;
datas[tail] = data;
tail = (tail + 1) % Length;
return true;
}
bool Pop(DataType& data)
{
if (IsEmpty())
return false;
data = datas[head];
head = (head + 1) % Length;
return true;
}
int Size()
{
if (tail >= head)
return tail - head;
else
return tail + Length - head;
}
};
单写线程和单读线程的ringbuffer
#include <iostream>
#include<thread>
#include<time.h>
#include<windows.h>
#include<mutex>
#define ARRAY_MAX_SIZE 4000000
using namespace std;
template<typename DataType, int Length = 200000>
class RingBuffer
{
private:
DataType datas[Length];
int head = 0;
int tail = 0;
public:
bool IsFull()
{
return (tail + 1) % Length == head;
}
bool IsEmpty()
{
return tail == head;
}
bool Push(DataType data)
{
if (IsFull())
return false;
datas[tail] = data;
tail = (tail + 1) % Length;
return true;
}
bool Pop(DataType& data)
{
if (IsEmpty())
return false;
data = datas[head];
head = (head + 1) % Length;
return true;
}
int Size()
{
if (tail >= head)
return tail - head;
else
return tail + Length - head;
}
};
clock_t start = 0;
int b = 0;
mutex mt;
bool isFinish = false;
RingBuffer<int> ringbuffer;
void WriteThread()
{
for (int index = 0; index < ARRAY_MAX_SIZE; ++index)
{
if (!ringbuffer.Push(index))
{
index = index - 1;
}
}
isFinish = true;
clock_t end = clock();
printf("Write DataQueue Num = %d\n ", ringbuffer.Size());
printf("time = %f b = %d\n ", (double)(end - start) / CLOCKS_PER_SEC, b);
}
void ReadThread()
{
while (1)
{
if (isFinish && ringbuffer.IsEmpty())
{
break;
}
else
{
if (!ringbuffer.IsEmpty())
{
ringbuffer.Pop(b);
}
}
}
clock_t end = clock();
printf("Read DataQueue Num = %d\n ", ringbuffer.Size());
printf("finish time = %f b = %d\n ", (double)(end - start) / CLOCKS_PER_SEC, b);
}
int main()
{
thread t1(WriteThread);
thread t2(ReadThread);
t1.join();
t2.join();
system("Pause");
return 0;
}
测试效率对比:
链式Queue单线程 | 9.9s~10.5s |
链式Queue单写线程和单读线程 | 11.0 ~ 11.3s |
RingBuffer单写线程和单读线程 | 0.45s~0.75s |
很显然 RingBuffer在解决单写线程和单读线程的问题性能好很多,数组连续的内存结构对读取写入的友好,加上Push和Pop仅仅是由两个整数索引改变能实现,也没有lock,因此RingBuffer比链式Queue速度快太多了。
但是请注意这里是单读线程和单写线程下,上面代码的RingBuffer实现才是线程安全的。多个读线程和多个写线程下也是不安全的,。并且考虑有溢满的情况,上面的RingBuffer实现在溢满的情况得小心,会有数据覆盖问题