之前的文章介绍了phxpaxos理论部分和网络部分,但是如何真正的实现paxos还没有说明。
下面通过:1。网络部分 2.官方带例子 导入我们要讲的paxos逻辑部分。
网络部分先导部分
根据之前介绍的网络部分,在这里查找网络接收到的消息到Paxos算法部分是如何处理的。
废话不多少,上代码。
文件src/communicate/network.cpp
int NetWork :: OnReceiveMessage(const char * pcMessage, const int iMessageLen)
{
if (m_poNode != nullptr)
{
//消息接收后,调用Node的接收函数。在头文件中node.h中定义。
m_poNode->OnReceiveMessage(pcMessage, iMessageLen);
}
else
{
PLHead("receive msglen %d", iMessageLen);
}
return 0;
}
继续往下找其实现。
在src/node/pnode.cpp
中找到其实现。
int PNode :: OnReceiveMessage(const char * pcMessage, const int iMessageLen)
{
if (pcMessage == nullptr || iMessageLen <= 0)
{
PLErr("Message size %d to small, not valid.", iMessageLen);
return -2;
}
int iGroupIdx = -1;
//先获取groupindex,并检测合法性。之后在研究
memcpy(&iGroupIdx, pcMessage, GROUPIDXLEN);
if (!CheckGroupID(iGroupIdx))
{
PLErr("Message groupid %d wrong, groupsize %zu", iGroupIdx, m_vecGroupList.size());
return Paxos_GroupIdxWrong;
}
//从字面理解,然后把消息投放到Instance中的OnReceiveMessage中,继续往下找
return m_vecGroupList[iGroupIdx]->GetInstance()->OnReceiveMessage(pcMessage, iMessageLen);
}
src/algorithm\instance.cpp
中找到其实现
int Instance :: OnReceiveMessage(const char * pcMessage, const int iMessageLen)
{
//将消息投递到IOLOOP中
m_oIOLoop.AddMessage(pcMessage, iMessageLen);
return 0;
}
将消息放到了IOLOOP中,这又是个什么鬼?
int IOLoop :: AddMessage(const char * pcMessage, const int iMessageLen)
{
m_oMessageQueue.lock();
BP->GetIOLoopBP()->EnqueueMsg();
if ((int)m_oMessageQueue.size() > QUEUE_MAXLENGTH)
{
BP->GetIOLoopBP()->EnqueueMsgRejectByFullQueue();
PLGErr("Queue full, skip msg");
m_oMessageQueue.unlock();
return -2;
}
if (m_iQueueMemSize > MAX_QUEUE_MEM_SIZE)
{
PLErr("queue memsize %d too large, can't enqueue", m_iQueueMemSize);
m_oMessageQueue.unlock();
return -2;
}
m_oMessageQueue.add(new string(pcMessage, iMessageLen));
m_iQueueMemSize += iMessageLen;
m_oMessageQueue.unlock();
return 0;
}
看起定义,我们就非常熟悉了,把收到消息放到queue中,跟我们之前处理消息的逻辑非常像,生产者消费者逻辑,验证一下。看一下IOLoop的定义。
class IOLoop : public Thread
{
public:
IOLoop(Config * poConfig, Instance * poInstance);
virtual ~IOLoop();
void run();
void Stop();
//消息处理主循环
void OneLoop(const int iTimeoutMs);
void DealWithRetry();
void ClearRetryQueue();
public:
//添加新消息
int AddMessage(const char * pcMessage, const int iMessageLen);
int AddRetryPaxosMsg(const PaxosMsg & oPaxosMsg);
void AddNotify();
public:
virtual bool AddTimer(const int iTimeout, const int iType, uint32_t & iTimerID);
virtual void RemoveTimer(uint32_t & iTimerID);
void DealwithTimeout(int & iNextTimeout);
void DealwithTimeoutOne(const uint32_t iTimerID, const int iType);
private:
bool m_bIsEnd;
bool m_bIsStart;
Timer m_oTimer;
std::map<uint32_t, bool> m_mapTimerIDExist;
//存储收到的消息
Queue<std::string *> m_oMessageQueue;
std::queue<PaxosMsg> m_oRetryQueue;
int m_iQueueMemSize;
Config * m_poConfig;
Instance * m_poInstance;
};
然后看一下处理部分oneloop
部分
void IOLoop :: OneLoop(const int iTimeoutMs)
{
std::string * psMessage = nullptr;
m_oMessageQueue.lock();
//获取消息
bool bSucc = m_oMessageQueue.peek(psMessage, iTimeoutMs);
if (!bSucc)
{
m_oMessageQueue.unlock();
}
else
{
m_oMessageQueue.pop();
m_oMessageQueue.unlock();
if (psMessage != nullptr && psMessage->size() > 0)
{
m_iQueueMemSize -= psMessage->size();
//消息发送给instance执行逻辑的处理
m_poInstance->OnReceive(*psMessage);
}
delete psMessage;
BP->GetIOLoopBP()->OutQueueMsg();
}
DealWithRetry();
//must put on here
//because addtimer on this funciton
m_poInstance->CheckNewValue();
}
OK,看代码就十分明显了,和之前的EventLoop如出一辙,处理定时、消息等内容,在此不再多说。
实例导入
src/test/test_server.h
文件是test中主逻辑类
class TestServer
{
public:
TestServer(const phxpaxos::NodeInfo & oMyNode, const phxpaxos::NodeInfoList & vecNodeList);
~TestServer();
//将整个系统逻辑运行起来,包括:节点信息的配置,启动节点等
int RunPaxos();
//将值写入到paxos中
int Write(const std::string & sTestValue, uint64_t & llInstanceID);
int BatchWrite(const std::string & sTestValue, uint64_t & llInstanceID, uint32_t & iBatchIndex);
int Ready();
TestSM * GetSM();
private:
int MakeLogStoragePath(std::string & sLogStoragePath);
private:
phxpaxos::NodeInfo m_oMyNode;
phxpaxos::NodeInfoList m_vecNodeList;
TestSM m_oTestSM;
phxpaxos::Node * m_poPaxosNode;
};
int TestServer :: RunPaxos()
{
//参数设置
Options oOptions;
int ret = MakeLogStoragePath(oOptions.sLogStoragePath);
if (ret != 0)
{
return ret;
}
//oOptions.iSyncInterval = 1;
oOptions.iGroupCount = 1;
oOptions.oMyNode = m_oMyNode;
oOptions.vecNodeInfoList = m_vecNodeList;
oOptions.bUseMembership = true;
GroupSMInfo oSMInfo;
oSMInfo.iGroupIdx = 0;
oSMInfo.vecSMList.push_back(&m_oTestSM);
oOptions.vecGroupSMInfoList.push_back(oSMInfo);
oOptions.bUseBatchPropose = true;
oOptions.bOpenChangeValueBeforePropose = true;
oOptions.iIOThreadCount = 3;
//真正启动node
ret = Node::RunNode(oOptions, m_poPaxosNode);
if (ret != 0)
{
printf("run paxos fail, ret %d\n", ret);
return ret;
}
m_poPaxosNode->SetBatchDelayTimeMs(0, 20);
m_poPaxosNode->SetBatchCount(0, 10);
printf("run paxos ok, ip %s port %d\n", m_oMyNode.GetIP().c_str(), m_oMyNode.GetPort());
return 0;
}
int TestServer :: Write(const std::string & sTestValue, uint64_t & llInstanceID)
{
SMCtx oCtx;
oCtx.m_iSMID = 1;
oCtx.m_pCtx = nullptr;
string sPackValue = TestSM::PackTestValue(sTestValue);
//与前面力量中的发起提议相对应起来,发起一次信息请求。
int ret = m_poPaxosNode->Propose(0, sPackValue, llInstanceID, &oCtx);
if (ret != 0)
{
return ret;
}
return 0;
}
继续往下找,找到Node的RunNode函数中的PNode::Init()函数。
int PNode :: Init(const Options & oOptions, NetWork *& poNetWork)
{
int ret = CheckOptions(oOptions);
if (ret != 0)
{
PLErr("CheckOptions fail, ret %d", ret);
return ret;
}
m_iMyNodeID = oOptions.oMyNode.GetNodeID();
//step1 init logstorage
LogStorage * poLogStorage = nullptr;
ret = InitLogStorage(oOptions, poLogStorage);
if (ret != 0)
{
return ret;
}
//step2 init network
ret = InitNetWork(oOptions, poNetWork);
if (ret != 0)
{
return ret;
}
//step3 build masterlist
for (int iGroupIdx = 0; iGroupIdx < oOptions.iGroupCount; iGroupIdx++)
{
MasterMgr * poMaster = new MasterMgr(this, iGroupIdx, poLogStorage, oOptions.pMasterChangeCallback);
assert(poMaster != nullptr);
m_vecMasterList.push_back(poMaster);
ret = poMaster->Init();
if (ret != 0)
{
return ret;
}
}
//step4 build grouplist
for (int iGroupIdx = 0; iGroupIdx < oOptions.iGroupCount; iGroupIdx++)
{
Group * poGroup = new Group(poLogStorage, poNetWork, m_vecMasterList[iGroupIdx]->GetMasterSM(), iGroupIdx, oOptions);
assert(poGroup != nullptr);
m_vecGroupList.push_back(poGroup);
}
//step5 build batchpropose
if (oOptions.bUseBatchPropose)
{
for (int iGroupIdx = 0; iGroupIdx < oOptions.iGroupCount; iGroupIdx++)
{
ProposeBatch * poProposeBatch = new ProposeBatch(iGroupIdx, this, &m_oNotifierPool);
assert(poProposeBatch != nullptr);
m_vecProposeBatch.push_back(poProposeBatch);
}
}
//step6 init statemachine
InitStateMachine(oOptions);
//step7 parallel init group
for (auto & poGroup : m_vecGroupList)
{
poGroup->StartInit();
}
for (auto & poGroup : m_vecGroupList)
{
int initret = poGroup->GetInitRet();
if (initret != 0)
{
ret = initret;
}
}
if (ret != 0)
{
return ret;
}
//last step. must init ok, then should start threads.
//because that stop threads is slower, if init fail, we need much time to stop many threads.
//so we put start threads in the last step.
for (auto & poGroup : m_vecGroupList)
{
//start group's thread first.
poGroup->Start();
}
RunMaster(oOptions);
RunProposeBatch();
PLHead("OK");
return 0;
}
OK ,上述代码中官方注释写的非常的清楚。不再过多的说明
总结
通过上述内容,可以清晰的了解一下内容:
1. 算法内部使用异步消息处理机制。
2. 整个系统的大体工作流程。
接下在我们将对具体的一致性逻辑记性分析,将以算法流程为分析主线,次分析算法模块角色中Instance,proposer,accept,prepare等内容。再然后分析statemachine,master,memberchange等内容.