元数据服务是BeeGFS中用来维护文件和目录关系及其属性配置的服务,其多线程epoll设计实现非常高效,主要流程如下:
ConnAcceptor(PThread)类(一个线程)负责监听端口,并接受客户端连接,然后把;连接信息(包含接收的套接字)写入管道;
StreamListenerV2(PThread)类(多个线程,可配置)从管道读取连接信息,使用epoll轮询接收数据,然后生成IncomingPreprocessedMsgWork(Work),写入MultiWorkQueue先进先出队列;
Worker(PThread)类(多个线程,可配置)从MultiWorkQueue队列取出消息进行处理。
程序初始化
主函数
创建App对象,App对象是程序的主要载体:
// fhgfs_meta\source\program\main.cpp
#include "Program.h"
int main(int argc, char** argv)
{
return Program::main(argc, argv);
}
// fhgfs_meta\source\program\Program.cpp
#include
#include "Program.h"
#include
App* Program::app = NULL;
int Program::main(int argc, char** argv)
{
BuildTypeTk::checkDebugBuildTypes();
AbstractApp::runTimeInitsAndChecks(); // must be called before creating a new App
app = new App(argc, argv);
app->startInCurrentThread();
int appRes = app->getAppResult();
delete app;
return appRes;
}
创建ConnAcceptor
主程序中会初始化一个线程,监听服务端口,由ConnAcceptor类负责:
// fhgfs_meta\source\app\App.cpp
void App::initComponents(TargetConsistencyState initialConsistencyState)
throw(ComponentInitException)
{
this->log->log(Log_DEBUG, "Initializing components...");
this->dgramListener = new DatagramListener(
netFilter, localNicList, ackStore, cfg->getConnMetaPortUDP() );
if(cfg->getTuneListenerPrioShift() )
dgramListener->setPriorityShift(cfg->getTuneListenerPrioShift() );
streamListenersInit();
unsigned short listenPort = cfg->getConnMetaPortTCP();
this->connAcceptor = new ConnAcceptor(this, localNicList, listenPort);
this->statsCollector = new StatsCollector(workQueue, STATSCOLLECTOR_COLLECT_INTERVAL_MS,
STATSCOLLECTOR_HISTORY_LENGTH);
this->buddyResyncer = new BuddyResyncer();
this->internodeSyncer = new InternodeSyncer(initialConsistencyState);
this->timerQueue = new TimerQueue(1, 1);
this->modificationEventFlusher = new ModificationEventFlusher();
workersInit();
commSlavesInit();
this->log->log(Log_DEBUG, "Components initialized.");
}
创建StreamListener
根据配置创建多个StreamListener实例,每个实例对应线程,用于从ConnAcceptor接收新连接,已及从从连接读取数据,生成Work:
// fhgfs_meta\source\app\App.cpp
void App::streamListenersInit() throw(ComponentInitException)
{
this->numStreamListeners = cfg->getTuneNumStreamListeners();
for(unsigned i=0; i < numStreamListeners; i++)
{
StreamListenerV2* listener = new StreamListenerV2(
std::string("StreamLis") + StringTk::uintToStr(i+1), this, workQueue);
if(cfg->getTuneListenerPrioShift() )
listener->setPriorityShift(cfg->getTuneListenerPrioShift() );
if(cfg->getTuneUseAggressiveStreamPoll() )
listener->setUseAggressivePoll();
streamLisVec.push_back(listener);
}
}
创建WorkQueue
创建WorkQueue,用于保存StreamListener生成的Work:
// fhgfs_meta\source\app\App.cpp
/**
* Init basic shared objects like work queues, node stores etc.
*/
void App::initDataObjects() throw(InvalidConfigException)
{
...
this->workQueue = new MultiWorkQueue();
this->commSlaveQueue = new MultiWorkQueue();
if(cfg->getTuneUsePerUserMsgQueues() )
workQueue->setIndirectWorkList(new UserWorkContainer() );
...
}
创建Worker
根据配置创建Worker线程,从WorkQueue读取Work并进行处理:
// fhgfs_meta\source\app\App.cpp
void App::workersInit() throw(ComponentInitException)
{
unsigned numWorkers = cfg->getTuneNumWorkers();
for(unsigned i=0; i < numWorkers; i++)
{
Worker* worker = new Worker(
std::string("Worker") + StringTk::uintToStr(i+1), workQueue, QueueWorkType_INDIRECT);
worker->setBufLens(cfg->getTuneWorkerBufSize(), cfg->getTuneWorkerBufSize() );
workerList.push_back(worker);
}
for(unsigned i=0; i < APP_WORKERS_DIRECT_NUM; i++)
{
Worker* worker = new Worker(
std::string("DirectWorker") + StringTk::uintToStr(i+1), workQueue, QueueWorkType_DIRECT);
worker->setBufLens(cfg->getTuneWorkerBufSize(), cfg->getTuneWorkerBufSize() );
workerList.push_back(worker);
}
}
连接监听
监听类ConnAcceptor
ConnAcceptor类的定义:
// fhgfs_common\source\common\components\streamlistenerv2\ConnAcceptor.h
class ConnAcceptor : public PThread
{
public:
ConnAcceptor(AbstractApp* app, NicAddressList& localNicList, unsigned short listenPort)
throw(ComponentInitException);
virtual ~ConnAcceptor();
private:
AbstractApp* app;
LogContext log;
StandardSocket* tcpListenSock;
StandardSocket* sdpListenSock;
RDMASocket* rdmaListenSock;
int epollFD;
bool initSocks(unsigned short listenPort, NicListCapabilities* localNicCaps);
virtual void run();
void listenLoop();
void onIncomingStandardConnection(StandardSocket* sock);
void onIncomingRDMAConnection(RDMASocket* sock);
void applySocketOptions(StandardSocket* sock);
public:
// getters & setters
};
连接监听循环
使用epool来轮询监听端口,并建立新连接:
// fhgfs_common\source\common\components\streamlistenerv2\ConnAcceptor.cpp
void ConnAcceptor::run()
{
try
{
registerSignalHandler();
listenLoop();
log.log(Log_DEBUG, "Component stopped.");
}
catch(std::exception& e)
{
PThread::getCurrentThreadApp()->handleComponentException(e);
}
}
void ConnAcceptor::listenLoop()
{
const int epollTimeoutMS = 3000;
struct epoll_event epollEvents[EPOLL_EVENTS_NUM];
// (just to have these values on the stack...)
const int epollFD = this->epollFD;
RDMASocket* rdmaListenSock = this->rdmaListenSock;
StandardSocket* sdpListenSock = this->sdpListenSock;
StandardSocket* tcpListenSock = this->tcpListenSock;
// wait for incoming events and handle them...
while(!getSelfTerminate() )
{
//log.log(Log_DEBUG, std::string("Before poll(). pollArrayLen: ") +
// StringTk::uintToStr(pollArrayLen) );
int epollRes = epoll_wait(epollFD, epollEvents, EPOLL_EVENTS_NUM, epollTimeoutMS);
if(unlikely(epollRes < 0) )
{ // error occurred
if(errno == EINTR) // ignore interruption, because the debugger causes this
continue;
log.logErr(std::string("Unrecoverable epoll_wait error: ") + System::getErrString() );
break;
}
// handle incoming connection attempts
for(size_t i=0; i < (size_t)epollRes; i++)
{
struct epoll_event* currentEvent = &epollEvents[i];
Pollable* currentPollable = (Pollable*)currentEvent->data.ptr;
//log.log(Log_DEBUG, std::string("Incoming data on FD: ") +
// StringTk::intToStr(pollArray[i].fd) ); // debug in
if(currentPollable == rdmaListenSock)
onIncomingRDMAConnection(rdmaListenSock);
else
if(currentPollable == tcpListenSock)
onIncomingStandardConnection(tcpListenSock);
else
if(currentPollable == sdpListenSock)
onIncomingStandardConnection(sdpListenSock);
else
{ // unknown connection => should never happen
log.log(Log_WARNING, "Should never happen: Ignoring event for unknown connection. "
"FD: " + StringTk::uintToStr(currentPollable->getFD() ) );
}
}
}
}
套接字监听处理(派发给流)
把建立的套接字发送给指定的StreamListener:
// fhgfs_common\source\common\components\streamlistenerv2\ConnAcceptor.cpp
/**
* Accept the incoming connection and add new socket to StreamListenerV2 queue.
*
* Note: This is for standard sockets like TCP and SDP.
*/
void ConnAcceptor::onIncomingStandardConnection(StandardSocket* sock)
{
try
{
struct sockaddr_in peerAddr;
socklen_t peerAddrLen = sizeof(peerAddr);
StandardSocket* acceptedSock =
(StandardSocket*)sock->accept( (struct sockaddr*)&peerAddr, &peerAddrLen);
// (note: level Log_DEBUG to avoid spamming the log until we have log topics)
log.log(Log_DEBUG, std::string("Accepted new connection from " +
Socket::endpointAddrToString(&peerAddr.sin_addr, ntohs(peerAddr.sin_port) ) ) +
std::string(" [SockFD: ") + StringTk::intToStr(acceptedSock->getFD() ) +
std::string("]") );
applySocketOptions(acceptedSock);
// hand the socket over to a stream listene