UDT 最新源码分析 -- 网络数据收发
从接口实现看 UDT 网络收发
从对外的接口实现方法来看,网络收发过程实际上是对 m_pSndBuffer 和 m_pRcvBuffer 进行操作,而实际的网络收发涉及到系统调度,算法实现等问题。简单来看看代码。
UDT 发送 send / sendmsg / sendfile
以 send 为例,外部接口调用send 其实并不是直接发送到网络,而是将数据加入发送的 buffer 中,后续再通过调度将数据发送到网络中去。send 仅仅针对流传输模式而言,其他模式不可调用此函数。对于数据包模式,应该调用 sendmsg。
CUDT::send(UDTSOCKET u, const char* buf, int len, int)
-> CUDT::send(const char* data, int len)
int CUDT::send(const char* data, int len)
{
...
if (m_iSndBufSize <= m_pSndBuffer->getCurrBufSize()) //buffer 已满
{
// 检查buffer状态,等待满足条件被触发。
// 检查网络连接状态,以及UDT 是否关闭等状态。
}
int size = (m_iSndBufSize - m_pSndBuffer->getCurrBufSize()) * m_iPayloadSize; //最大可用
if (size > len)
size = len; //size 为本次需要填充的字节,最大为可用容量
// record total time used for sending
if (0 == m_pSndBuffer->getCurrBufSize())
m_llSndDurationCounter = CTimer::getTime();
// insert the user buffer into the sending list
m_pSndBuffer->addBuffer(data, size); //重点代码,发送过程其实只是放入buffer
// insert this socket to snd list if it is not on the list yet
m_pSndQueue->m_pSndUList->update(this, false);
if (m_iSndBufSize <= m_pSndBuffer->getCurrBufSize())
{
// write is not available any more
s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_OUT, false);
}
return size;
}
sendmsg 与 send 函数有非常多代码一致,核心代码基本上没有变化。sendfile 中 addbuffer 变成 addBufferFromFile,其余基本没变化。
UDT 接收 recv /recvmsg /recvfile
从接口调用 recv 实际上只是从接收缓冲中取出数据,在获取数据会检查当前是否流模式,如果没数据,或启动条件唤醒和定时等待等,也会检查网络连接是否正常。
int CUDT::recv(char* data, int len)
{
...
if (0 == m_pRcvBuffer->getRcvDataSize()) // buffer 为空
{
... //等待条件满足或者超时
}
int res = m_pRcvBuffer->readBuffer(data, len);
if (m_pRcvBuffer->getRcvDataSize() <= 0)
{
// read is not available any more
s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_IN, false); //删除
}
if ((res <= 0) && (m_iRcvTimeOut >= 0))
throw CUDTException(6, 3, 0);
return res;
}
从内部实现看 UDT 网络收发
从接口上可以看到,发送接收仅仅是将数据与buffer进行交互,看不到数据真正进行发送接收的地方。那么在内部究竟如何实现的呢?在以前的文章分析中已经提到过发送接收工作线程的概念,在这里再次看看,代码参考 queue.cpp。
初始化的地方如下,通过调用 m_pSndQueue 和 m_pRcvQueue 调用 init 实现 worker 线程创建:
void CUDTUnited::updateMux(CUDTSocket* s, const sockaddr* addr, const UDPSOCKET* udpsock)
{
...
CMultiplexer m;
m.m_iID = s->m_SocketID;
m.m_pChannel = new CChannel(s->m_pUDT->m_iIPversion);
m.m_pChannel->setSndBufSize(s->m_pUDT->m_iUDPSndBufSize);
m.m_pChannel->setRcvBufSize(s->m_pUDT->m_iUDPRcvBufSize);
try
{
if (NULL != udpsock)
m.m_pChannel->open(*udpsock);
else
m.m_pChannel->open(addr);
}
catch (CUDTException& e)
{
m.m_pChannel->close();
delete m.m_pChannel;
throw e;
}
m.m_pTimer = new CTimer;
m.m_pSndQueue = new CSndQueue;
m.m_pSndQueue->init(m.m_pChannel, m.m_pTimer);
m.m_pRcvQueue = new CRcvQueue;
m.m_pRcvQueue->init(32, s->m_pUDT->m_iPayloadSize, m.m_iIPversion, 1024, m.m_pChannel, m.m_pTimer);
m_mMultiplexer[m.m_iID] = m;
}
UDT 发送工作线程
发送线程中主要的变量有 m_pSndUList, m_pChannel, m_pTimer。线程的工作就是不停的检查 m_pSndUList 中的UDT 实例,取出包,通过 m_pChannel 发送出去。如果取出的包时发现未到发送时间,则通过 m_pTimer sleep 剩余的时间再发送。
创建线程如下所示:
void CSndQueue::init(CChannel* c, CTimer* t)
{
m_pChannel = c;
m_pTimer = t;
m_pSndUList = new CSndUList;
m_pSndUList->m_pWindowLock = &m_WindowLock;
m_pSndUList->m_pWindowCond = &m_WindowCond;
m_pSndUList->m_pTimer = m_pTimer;
#ifndef WIN32
if (0 != pthread_create(&m_WorkerThread, NULL, CSndQueue::worker, this))
{
m_WorkerThread = 0;
throw CUDTException(3, 1);
}
#else
DWORD threadID;
m_WorkerThread = CreateThread(NULL, 0, CSndQueue::worker, this, 0, &threadID);
if (NULL == m_WorkerThread)
throw CUDTException(3, 1);
#endif
}
根据前面的描述,接下来理解发送工作线程运行过程。getNextProcTime 实际上就是获取 m_pHeap[0] 的 m_llTimeStamp。这个时间就是即将要发送的数据的时间。sleepto 等待时间到达。pop 则是初始化 CPacket,然后再发送。如果 ts <= 0,代表当前并无数据需要发送,需要继续等待。
#ifndef WIN32
void* CSndQueue::worker(void* param)
#else
DWORD WINAPI CSndQueue::worker(LPVOID param)
#endif
{
CSndQueue* self = (CSndQueue*)param;
while (!self->m_bClosing)
{
uint64_t ts = self->m_pSndUList->getNextProcTime(); //获取下一次发送时间
if (ts > 0)
{
// wait until next processing time of the first socket on the list
uint64_t currtime;
CTimer::rdtsc(currtime);
if (currtime < ts) //时间未到
self->m_pTimer->sleepto(ts); //sleep, 控制包与包之间的发送间隔
// it is time to send the next pkt
sockaddr* addr;
CPacket pkt;
if (self->m_pSndUList->pop(addr, pkt) < 0)
continue;
self->m_pChannel->sendto(addr, pkt);
}
else
{
// wait here if there is no sockets with data to be sent
#ifndef WIN32
pthread_mutex_lock(&self->m_WindowLock);
if (!self->m_bClosing && (self->m_pSndUList->m_iLastEntry < 0))
pthread_cond_wait(&self->m_WindowCond, &self->m_WindowLock);
pthread_mutex_unlock(&self->m_WindowLock);
#else
WaitForSingleObject(self->m_WindowCond, INFINITE);
#endif
}
}
#ifndef WIN32
return NULL;
#else
SetEvent(self->m_ExitCond);
return 0;
#endif
}
Retrieve the next packet and peer address from the first entry, and reschedule it in the queue.
在线程循环块内,出现了 pop 方法。这个方法取出 m_pHeap 中的根节点,检查时间戳,若时间已到,在堆中删除该节点,进入 packData。
int CSndUList::pop(sockaddr*& addr, CPacket& pkt)
{
CGuard listguard(m_ListLock);
if (-1 == m_iLastEntry) //m_pHeap中为空
return -1;
// no pop until the next schedulled time
uint64_t ts;
CTimer::rdtsc(ts);
if (ts < m_pHeap[0]->m_llTimeStamp)
return -1;
CUDT* u = m_pHeap[0]->m_pUDT;
remove_(u);
if (!u->m_bConnected || u->m_bBroken)
return -1;
// pack a packet from the socket
if (u->packData(pkt, ts) <= 0)
return -1;
addr = u->m_pPeerAddr;
// insert a new entry, ts is the next processing time
if (ts > 0)
insert_(ts, u);
return 1;
}
m_pHeap 是一个以节点时间为参考建立的最小堆。所有的插入与删除操作均为堆的操作,需要注意的是,孩子节点与根节点的对应关系。对于根节点 q 来说,左孩子序号为 2 * q + 1, 右孩子为 2 * q + 2,这也是代码中的 p 节点值。
首先看删除某节点的操作:
void CSndUList::remove_(const CUDT* u)
{
CSNode* n = u->m_pSNode;
if (n->m_iHeapLoc >= 0)
{
// remove the node from heap 最后节点与被删节点交换
m_pHeap[n->m_iHeapLoc] = m_pHeap[m_iLastEntry];
m_iLastEntry --;
m_pHeap[n->m_iHeapLoc]->m_iHeapLoc = n->m_iHeapLoc;
int q = n->m_iHeapLoc; //被删位置上新节点
int p = q * 2 + 1; //左孩子序号
while (p <= m_iLastEntry) // 存在左孩子节点
{
// 存在右孩子,且左孩子时间戳大于右孩子时间戳,则修改当前孩子为右孩子
if ((p + 1 <= m_iLastEntry) && (m_pHeap[p]->m_llTimeStamp > m_pHeap[p + 1]->m_llTimeStamp))
p ++;
// 如果根节点时间戳大于孩子中最小时间戳节点,则交换,并置当前节点为新的根节点的左孩子
if (m_pHeap[q]->m_llTimeStamp > m_pHeap[p]->m_llTimeStamp)
{
CSNode* t = m_pHeap[p];
m_pHeap[p] = m_pHeap[q];
m_pHeap[p]->m_iHeapLoc = p;
m_pHeap[q] = t;
m_pHeap[q]->m_iHeapLoc = q;
q = p;
p = q * 2 + 1;
}
else
break;
}
n->m_iHeapLoc = -1;
}
// the only event has been deleted, wake up immediately
if (0 == m_iLastEntry)
m_pTimer->interrupt();
}
对于插入操作,只要记住节点序号关系,就很容易看明白了。父节点 p 为孩子节点 (q-1)/2。 如果还不明白,只能去复习一下堆的数据结构相关知识。
void CSndUList::insert_(int64_t ts, const CUDT* u)
{
CSNode* n = u->m_pSNode;
// do not insert repeated node
if (n->m_iHeapLoc >= 0)
return;
//插入增加到最后节点
m_iLastEntry ++;
m_pHeap[m_iLastEntry] = n;
n->m_llTimeStamp = ts;
//开始调整
int q = m_iLastEntry;
int p = q;
while (p != 0)
{
p = (q - 1) >> 1; //父节点
if (m_pHeap[p]->m_llTimeStamp > m_pHeap[q]->m_llTimeStamp)
{
CSNode* t = m_pHeap[p];
m_pHeap[p] = m_pHeap[q];
m_pHeap[q] = t;
t->m_iHeapLoc = q;
q = p;
}
else
break;
}
n->m_iHeapLoc = q;
// an earlier event has been inserted, wake up sending worker
if (n->m_iHeapLoc == 0)
m_pTimer->interrupt();
// first entry, activate the sending queue
if (0 == m_iLastEntry)
{
#ifndef WIN32
pthread_mutex_lock(m_pWindowLock);
pthread_cond_signal(m_pWindowCond); //唤醒线程
pthread_mutex_unlock(m_pWindowLock);
#else
SetEvent(*m_pWindowCond);
#endif
}
}
在发送线程中还有一个 packData 方法,处理了两类 packet 的读取,一是丢失的 packet,二是正常的顺序传输的包。处理过程:
-
获取 entertime, 更新 m_ullTimeDiff, 即记录当前发包对应目标时间的差值,会影响到下一次发包的目标时间。UDT 以此使得发包的时间间隔始终控制在算法之中。
在 UDT 中,在开始的时候会初始化一个发包时间间隔 m_ullInterval ,这个值表示期望的发送时间间隔。初始化如下所示:
m_ullInterval = (uint64_t)(m_pCC->m_dPktSndPeriod * m_ullCPUFrequency);
m_ullInterval 并不是一个固定的值,而是根据网络状态进行调整。比如在 processCtrl 中 收到包类型为 4 时,就会改变。但是查找代码可以发现,当前udt 版本不再执行 sendCtrl(4),代码详见包类型为6 时,代码已经被注释。但是无用代码并未删除,如下所示。
// One way packet delay is increasing, so decrease the sending rate m_ullInterval = (uint64_t)ceil(m_ullInterval * 1.125);
在拥塞控制中 CCUpdate 改变 m_ullInterva 值:
m_ullInterval = (uint64_t) (m_pCC->m_dPktSndPeriod * m_ullCPUFrequency);
m_dCongestionWindow = m_pCC->m_dCWndSize;
if (m_llMaxBW <= 0)
return;
const double minSP = 1000000.0 / (double(m_llMaxBW) / m_iMSS) * m_ullCPUFrequency;
if (m_ullInterval < minSP)
m_ullInterval = minSP;在UDT中,包发送会有一个随着网络状况调整的一个发送周期,也就是 m_ullInterva 值。在每一次发送包时,都会根据 m_ullInterval值计算下一次包发送的理想时间间隔,并修改 m_ullTargetTime 值。
-
检查是否丢包。
-
如果丢包,就将 packet.m_iSeqNo 赋值为丢包的序号值。然后计算 offset。m_iSndLastDataAck 是在接收到最后一个 ack 时更新的序号,之前的所有包都被确认。如果 offset < 0, 表示上次确认序号大于丢包序号,即有包未收到但是被确认,可能出现错误。读取数据如果失败,就会发送丢弃请求,并更新 m_iSndCurrSeqNo。
-
如果没有丢包,则发送一个新包。根据流窗口与拥塞窗口更新 cwnd 值。若发送包序号在窗口范围内,则 readData 并且更新本地和 ccc中 m_iSndCurrSeqNo,更新 m_iSeqNo,检查是否需要发送包对探测。
-
更新 packet 与 cc,更新 ts, m_ullTargetTime。包将在 worker 中被发送
-
int CUDT::packData(CPacket& packet, uint64_t& ts)
{
int payload = 0;
bool probe = false;
uint64_t entertime;
CTimer::rdtsc(entertime);
if ((0 != m_ullTargetTime) && (entertime > m_ullTargetTime))
m_ullTimeDiff += entertime - m_ullTargetTime;
// Loss retransmission always has higher priority.
if ((packet.m_iSeqNo = m_pSndLossList->getLostSeq()) >= 0) //发现丢包,可能超时或者 NACK回应消息
{
// protect m_iSndLastDataAck from updating by ACK processing
CGuard ackguard(m_AckLock);
int offset = CSeqNo::seqoff(m_iSndLastDataAck, packet.m_iSeqNo);
if (offset < 0)
return 0;
int msglen;
//重新取数据
payload = m_pSndBuffer->readData(&(packet.m_pcData), offset, packet.m_iMsgNo, msglen);
if (-1 == payload)
{
int32_t seqpair[2];
seqpair[0] = packet.m_iSeqNo;
seqpair[1] = CSeqNo::incseq(seqpair[0], msglen);
sendCtrl(7, &packet.m_iMsgNo, seqpair, 8);
// only one msg drop request is necessary
m_pSndLossList->remove(seqpair[1]);
// skip all dropped packets
if (CSeqNo::seqcmp(m_iSndCurrSeqNo, CSeqNo::incseq(seqpair[1])) < 0)
m_iSndCurrSeqNo = CSeqNo::incseq(seqpair[1]);
return 0;
}
else if (0 == payload)
return 0;
++ m_iTraceRetrans;
++ m_iRetransTotal;
}
else
{
// If no loss, pack a new packet.
// check congestion/flow window limit
int cwnd = (m_iFlowWindowSize < (int)m_dCongestionWindow) ? m_iFlowWindowSize : (int)m_dCongestionWindow;
if (cwnd >= CSeqNo::seqlen(m_iSndLastAck, CSeqNo::incseq(m_iSndCurrSeqNo)))
{
if (0 != (payload = m_pSndBuffer->readData(&(packet.m_pcData), packet.m_iMsgNo)))
{
m_iSndCurrSeqNo = CSeqNo::incseq(m_iSndCurrSeqNo);
m_pCC->setSndCurrSeqNo(m_iSndCurrSeqNo);
packet.m_iSeqNo = m_iSndCurrSeqNo;
// every 16 (0xF) packets, a packet pair is sent
if (0 == (packet.m_iSeqNo & 0xF))
probe = true;
}
else
{
m_ullTargetTime = 0;
m_ullTimeDiff = 0;
ts = 0;
return 0;
}
}
else
{
m_ullTargetTime = 0;
m_ullTimeDiff = 0;
ts = 0;
return 0;
}
}
packet.m_iTimeStamp = int(CTimer::getTime() - m_StartTime);
packet.m_iID = m_PeerID;
packet.setLength(payload);
m_pCC->onPktSent(&packet);
//m_pSndTimeWindow->onPktSent(packet.m_iTimeStamp);
++ m_llTraceSent;
++ m_llSentTotal;
if (probe)
{
// sends out probing packet pair
ts = entertime;
probe = false;
}
else
{
#ifndef NO_BUSY_WAITING
ts = entertime + m_ullInterval;
#else
if (m_ullTimeDiff >= m_ullInterval)
{
ts = entertime;
m_ullTimeDiff -= m_ullInterval;
}
else
{
ts = entertime + m_ullInterval - m_ullTimeDiff;
m_ullTimeDiff = 0;
}
#endif
}
m_ullTargetTime = ts;
return payload;
}
UDT 接收工作线程
接收工作线程的主要工作同样在 while 循环中完成。首先检查是否有新的 socket 到来,如果有,则不断加入 m_pRcvUList,同时添加到 m_pHash 中。然后再 m_UnitQueue 中查找是否存在可用的存储块,在此过程中如果发现已经数量太多会自动扩容。不断的通过 recvfrom 接收包。
如果是连接请求, 将被送给 listening socket 或者 rendezvous sockets,对应将进入 listen 或者 connect 操作。否则, 根据 getFlag 判断,进入 processData 或者 processCtrl。这也是接收数据被处理的核心函数。最后将这个 UDT实例 放入 m_pRcvUList 最后。
#ifndef WIN32
void* CRcvQueue::worker(void* param)
#else
DWORD WINAPI CRcvQueue::worker(LPVOID param)
#endif
{
CRcvQueue* self = (CRcvQueue*)param;
sockaddr* addr = (AF_INET == self->m_UnitQueue.m_iIPversion) ? (sockaddr*) new sockaddr_in : (sockaddr*) new sockaddr_in6;
CUDT* u = NULL;
int32_t id;
while (!self->m_bClosing)
{
#ifdef NO_BUSY_WAITING
self->m_pTimer->tick();
#endif
// check waiting list, if new socket, insert it to the list
while (self->ifNewEntry())
{
CUDT* ne = self->getNewEntry();
if (NULL != ne)
{
self->m_pRcvUList->insert(ne);
self->m_pHash->insert(ne->m_SocketID, ne);
}
}
// find next available slot for incoming packet
CUnit* unit = self->m_UnitQueue.getNextAvailUnit();
if (NULL == unit)
{
// no space, skip this packet
CPacket temp;
temp.m_pcData = new char[self->m_iPayloadSize];
temp.setLength(self->m_iPayloadSize);
self->m_pChannel->recvfrom(addr, temp);
delete [] temp.m_pcData;
goto TIMER_CHECK;
}
unit->m_Packet.setLength(self->m_iPayloadSize);
// reading next incoming packet, recvfrom returns -1 is nothing has been received
if (self->m_pChannel->recvfrom(addr, unit->m_Packet) < 0)
goto TIMER_CHECK;
id = unit->m_Packet.m_iID;
// ID 0 is for connection request, which should be passed to the listening socket or rendezvous sockets
if (0 == id)
{
if (NULL != self->m_pListener)
self->m_pListener->listen(addr, unit->m_Packet);
else if (NULL != (u = self->m_pRendezvousQueue->retrieve(addr, id)))
{
// asynchronous connect: call connect here
// otherwise wait for the UDT socket to retrieve this packet
if (!u->m_bSynRecving)
u->connect(unit->m_Packet);
else
self->storePkt(id, unit->m_Packet.clone());
}
}
else if (id > 0)
{
if (NULL != (u = self->m_pHash->lookup(id)))
{
if (CIPAddress::ipcmp(addr, u->m_pPeerAddr, u->m_iIPversion))
{
if (u->m_bConnected && !u->m_bBroken && !u->m_bClosing)
{
if (0 == unit->m_Packet.getFlag())
u->processData(unit);
else
u->processCtrl(unit->m_Packet);
u->checkTimers();
self->m_pRcvUList->update(u);
}
}
}
else if (NULL != (u = self->m_pRendezvousQueue->retrieve(addr, id)))
{
if (!u->m_bSynRecving)
u->connect(unit->m_Packet);
else
self->storePkt(id, unit->m_Packet.clone());
}
}
TIMER_CHECK:
// take care of the timing event for all UDT sockets
uint64_t currtime;
CTimer::rdtsc(currtime);
CRNode* ul = self->m_pRcvUList->m_pUList;
uint64_t ctime = currtime - 100000 * CTimer::getCPUFrequency();
while ((NULL != ul) && (ul->m_llTimeStamp < ctime))
{
CUDT* u = ul->m_pUDT;
if (u->m_bConnected && !u->m_bBroken && !u->m_bClosing)
{
u->checkTimers();
self->m_pRcvUList->update(u);
}
else
{
// the socket must be removed from Hash table first, then RcvUList
self->m_pHash->remove(u->m_SocketID);
self->m_pRcvUList->remove(u);
u->m_pRNode->m_bOnList = false;
}
ul = self->m_pRcvUList->m_pUList;
}
// Check connection requests status for all sockets in the RendezvousQueue.
self->m_pRendezvousQueue->updateConnStatus();
}
if (AF_INET == self->m_UnitQueue.m_iIPversion)
delete (sockaddr_in*)addr;
else
delete (sockaddr_in6*)addr;
#ifndef WIN32
return NULL;
#else
SetEvent(self->m_ExitCond);
return 0;
#endif
}
checkTimers 会更新 cc 参数,并发送 ack 包,检查连接是否中断。在代码中,NAK 定时器不再生效,仅仅依靠发送方的超时机制。检测如果16个超时 且 总时间达到阈值才会认为连接挂掉。超时也会导致拥塞控制算法进行调整。
void CUDT::checkTimers()
{
// update CC parameters
CCUpdate(); //更新发包时间间隔和拥塞窗口
uint64_t currtime;
CTimer::rdtsc(currtime);
if ((currtime > m_ullNextACKTime) || ((m_pCC->m_iACKInterval > 0) && (m_pCC->m_iACKInterval <= m_iPktCount)))
{
// ACK timer expired or ACK interval is reached
sendCtrl(2); //ack
CTimer::rdtsc(currtime);
if (m_pCC->m_iACKPeriod > 0) //更新 m_ullNextACKTime
m_ullNextACKTime = currtime + m_pCC->m_iACKPeriod * m_ullCPUFrequency;
else
m_ullNextACKTime = currtime + m_ullACKInt;
m_iPktCount = 0;
m_iLightACKCount = 1;
}
else if (m_iSelfClockInterval * m_iLightACKCount <= m_iPktCount)
{
//send a "light" ACK
sendCtrl(2, NULL, NULL, 4);
++ m_iLightACKCount;
}
// we are not sending back repeated NAK anymore and rely on the sender's EXP for retransmission
//if ((m_pRcvLossList->getLossLength() > 0) && (currtime > m_ullNextNAKTime))
//{
// // NAK timer expired, and there is loss to be reported.
// sendCtrl(3);
//
// CTimer::rdtsc(currtime);
// m_ullNextNAKTime = currtime + m_ullNAKInt;
//} //不再触发 NAK 定时器,仅仅依靠发送方的重传超时,应该是为了减少误丢包识别。
uint64_t next_exp_time;
if (m_pCC->m_bUserDefinedRTO)
next_exp_time = m_ullLastRspTime + m_pCC->m_iRTO * m_ullCPUFrequency;
else
{
uint64_t exp_int = (m_iEXPCount * (m_iRTT + 4 * m_iRTTVar) + m_iSYNInterval) * m_ullCPUFrequency;
if (exp_int < m_iEXPCount * m_ullMinExpInt)
exp_int = m_iEXPCount * m_ullMinExpInt;
next_exp_time = m_ullLastRspTime + exp_int;
}
if (currtime > next_exp_time)
{
// Haven't receive any information from the peer, is it dead?!
// timeout: at least 16 expirations and must be greater than 10 seconds
if ((m_iEXPCount > 16) && (currtime - m_ullLastRspTime > 5000000 * m_ullCPUFrequency))
{
// Connection is broken.
// UDT does not signal any information about this instead of to stop quietly.
// Application will detect this when it calls any UDT methods next time.
m_bClosing = true;
m_bBroken = true;
m_iBrokenCounter = 30;
// update snd U list to remove this socket
m_pSndQueue->m_pSndUList->update(this);
releaseSynch();
// app can call any UDT API to learn the connection_broken error
s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_IN | UDT_EPOLL_OUT | UDT_EPOLL_ERR, true);
CTimer::triggerEvent();
return;
}
// sender: Insert all the packets sent after last received acknowledgement into the sender loss list.
// recver: Send out a keep-alive packet
if (m_pSndBuffer->getCurrBufSize() > 0)
{
if ((CSeqNo::incseq(m_iSndCurrSeqNo) != m_iSndLastAck) && (m_pSndLossList->getLossLength() == 0))
{
// resend all unacknowledged packets on timeout, but only if there is no packet in the loss list
int32_t csn = m_iSndCurrSeqNo;
int num = m_pSndLossList->insert(m_iSndLastAck, csn);
m_iTraceSndLoss += num;
m_iSndLossTotal += num;
}
m_pCC->onTimeout();
CCUpdate();
// immediately restart transmission
m_pSndQueue->m_pSndUList->update(this);
}
else
{
sendCtrl(1); //keep-live 包
}
++ m_iEXPCount; //增加,如果到达16 次,进入超时处理,如果收到确认,则重置为0。
// Reset last response time since we just sent a heart-beat.
m_ullLastRspTime = currtime;
}
}
再回头看数据处理部分 processData 。
int CUDT::processData(CUnit* unit)
{
CPacket& packet = unit->m_Packet;
// Just heard from the peer, reset the expiration count.
m_iEXPCount = 1; //有收到数据,重置 EXP
uint64_t currtime;
CTimer::rdtsc(currtime);
m_ullLastRspTime = currtime; //更新 m_ullLastRspTime
m_pCC->onPktReceived(&packet); //未找到函数的实现
++ m_iPktCount;
// update time information, 记录包到达的时间以及上一包时间
m_pRcvTimeWindow->onPktArrival(); //记录的目的用于计算包的到达速率,然后将计算的速率通过ACK反馈回去
// check if it is probing packet pair, 用于估计链路容量,将计算的容量通过ACK反馈回去
if (0 == (packet.m_iSeqNo & 0xF))//检查是否为包对
m_pRcvTimeWindow->probe1Arrival(); //记录包对中第一个包的到达时间
else if (1 == (packet.m_iSeqNo & 0xF))
m_pRcvTimeWindow->probe2Arrival(); // 记录探测包对的时间间隔
++ m_llTraceRecv;
++ m_llRecvTotal;
int32_t offset = CSeqNo::seqoff(m_iRcvLastAck, packet.m_iSeqNo);
if ((offset < 0) || (offset >= m_pRcvBuffer->getAvailBufSize()))
return -1;
if (m_pRcvBuffer->addData(unit, offset) < 0)//将数据包加入到 m_pRcvBuffer
return -1;
// Loss detection.
if (CSeqNo::seqcmp(packet.m_iSeqNo, CSeqNo::incseq(m_iRcvCurrSeqNo)) > 0)
{
// If loss found, insert them to the receiver loss list
m_pRcvLossList->insert(CSeqNo::incseq(m_iRcvCurrSeqNo), CSeqNo::decseq(packet.m_iSeqNo));
// pack loss list for NAK
int32_t lossdata[2];
lossdata[0] = CSeqNo::incseq(m_iRcvCurrSeqNo) | 0x80000000;
lossdata[1] = CSeqNo::decseq(packet.m_iSeqNo);
// Generate loss report immediately.
sendCtrl(3, NULL, lossdata, (CSeqNo::incseq(m_iRcvCurrSeqNo) == CSeqNo::decseq(packet.m_iSeqNo)) ? 1 : 2);
int loss = CSeqNo::seqlen(m_iRcvCurrSeqNo, packet.m_iSeqNo) - 2;
m_iTraceRcvLoss += loss;
m_iRcvLossTotal += loss;
}
// This is not a regular fixed size packet...
//an irregular sized packet usually indicates the end of a message, so send an ACK immediately
if (packet.getLength() != m_iPayloadSize)
CTimer::rdtsc(m_ullNextACKTime);
// Update the current largest sequence number that has been received.
// Or it is a retransmitted packet, remove it from receiver loss list.
if (CSeqNo::seqcmp(packet.m_iSeqNo, m_iRcvCurrSeqNo) > 0)
m_iRcvCurrSeqNo = packet.m_iSeqNo;
else
m_pRcvLossList->remove(packet.m_iSeqNo);
return 0;
}
接下来看控制消息的处理。这部分的内容可以参考 UDT 最新协议分析.
- ACK 处理
- 如果是一个轻量级 ACK,更新 m_iFlowWindowSize 和 m_iSndLastAck, 终止处理。
- 否则:
- 使用相同的 ACK 序号返回一个 ACK2 作为确认的确认。更新 m_ullSndLastAck2Time, m_iFlowWindowSize, m_iSndLastDataAck 和 m_iSndLastAck。\
- 更新发送丢失链表,移除已经被确认的所有包序号。\
- 更新RTT与RTTVar。更新ACK和NAK周期为 4 * RTT + RTTVar + SYN。\
- 更新发送端缓冲,释放已经被确认的缓冲。\
- 更新包到达速率为:A = (A * 7 + a) / 8,其中a为ACK中携带的相应值。更新链路容量估计值:B = (B * 7 + b) / 8,其中b为ACK中携带的相应值。
- 更新发包间隔 m_ullInterval。
void CUDT::processCtrl(CPacket& ctrlpkt)
{
...
case 2: //010 - Acknowledgement
{
int32_t ack;
// process a lite ACK
if (4 == ctrlpkt.getLength())
{
ack = *(int32_t *)ctrlpkt.m_pcData;
if (CSeqNo::seqcmp(ack, m_iSndLastAck) >= 0)
{
m_iFlowWindowSize -= CSeqNo::seqoff(m_iSndLastAck, ack); //更新 m_iFlowWindowSize
m_iSndLastAck = ack;
}
break;
}
// read ACK seq. no.
ack = ctrlpkt.getAckSeqNo();
// send ACK acknowledgement
// number of ACK2 can be much less than number of ACK
uint64_t now = CTimer::getTime();
if ((currtime - m_ullSndLastAck2Time > (uint64_t)m_iSYNInterval) || (ack == m_iSndLastAck2))
{
sendCtrl(6, &ack); // ack of ack, 对确认包的二次确认
m_iSndLastAck2 = ack;
m_ullSndLastAck2Time = now;
}
// Got data ACK
ack = *(int32_t *)ctrlpkt.m_pcData;
// check the validation of the ack
if (CSeqNo::seqcmp(ack, CSeqNo::incseq(m_iSndCurrSeqNo)) > 0)
{
//this should not happen: attack or bug . 不应该大于最大发送序号
m_bBroken = true;
m_iBrokenCounter = 0;
break;
}
if (CSeqNo::seqcmp(ack, m_iSndLastAck) >= 0) //新的数据的 ack
{
// Update Flow Window Size, must update before and together with m_iSndLastAck
m_iFlowWindowSize = *((int32_t *)ctrlpkt.m_pcData + 3);
m_iSndLastAck = ack;
}
// protect packet retransmission
CGuard::enterCS(m_AckLock);
int offset = CSeqNo::seqoff(m_iSndLastDataAck, ack);
if (offset <= 0)
{
// discard it if it is a repeated ACK
CGuard::leaveCS(m_AckLock);
break;
}
// acknowledge the sending buffer
m_pSndBuffer->ackData(offset); //仅修改 m_pFirstBlock 指针 和 m_iCount
// record total time used for sending
m_llSndDuration += currtime - m_llSndDurationCounter;
m_llSndDurationTotal += currtime - m_llSndDurationCounter;
m_llSndDurationCounter = currtime;
// update sending variables
m_iSndLastDataAck = ack;
m_pSndLossList->remove(CSeqNo::decseq(m_iSndLastDataAck));//重点函数,后续介绍
CGuard::leaveCS(m_AckLock);
#ifndef WIN32
pthread_mutex_lock(&m_SendBlockLock);
if (m_bSynSending)
pthread_cond_signal(&m_SendBlockCond);
pthread_mutex_unlock(&m_SendBlockLock);
#else
if (m_bSynSending)
SetEvent(m_SendBlockCond);
#endif
// acknowledde any waiting epolls to write
s_UDTUnited.m_EPoll.update_events(m_SocketID, m_sPollID, UDT_EPOLL_OUT, true);
// insert this socket to snd list if it is not on the list yet
m_pSndQueue->m_pSndUList->update(this, false);
// Update RTT
//m_iRTT = *((int32_t *)ctrlpkt.m_pcData + 1);
//m_iRTTVar = *((int32_t *)ctrlpkt.m_pcData + 2);
int rtt = *((int32_t *)ctrlpkt.m_pcData + 1);
m_iRTTVar = (m_iRTTVar * 3 + abs(rtt - m_iRTT)) >> 2;
m_iRTT = (m_iRTT * 7 + rtt) >> 3;
m_pCC->setRTT(m_iRTT); //更新cc m_iRTT
if (ctrlpkt.getLength() > 16)
{
// Update Estimated Bandwidth and packet delivery rate
if (*((int32_t *)ctrlpkt.m_pcData + 4) > 0)
m_iDeliveryRate = (m_iDeliveryRate * 7 + *((int32_t *)ctrlpkt.m_pcData + 4)) >> 3;
if (*((int32_t *)ctrlpkt.m_pcData + 5) > 0)
m_iBandwidth = (m_iBandwidth * 7 + *((int32_t *)ctrlpkt.m_pcData + 5)) >> 3;
m_pCC->setRcvRate(m_iDeliveryRate); //更新cc m_iRcvRate
m_pCC->setBandwidth(m_iBandwidth); //更新cc m_iBandwidth
}
m_pCC->onACK(ack); //更新cc m_dPktSndPeriod,进一步影响发包间隔计算 m_ullInterval
CCUpdate(); //重新计算 m_ullInterval
++ m_iRecvACK;
++ m_iRecvACKTotal;
break;
}
...
}
- ACK2 处理
- acknowledge 根据ACK2中的ACK序号,在ACK历史窗口中找到关联的ACK,根据ACK2到达时间和ACK离开时间,计算rtt。
- 计算新的 RTT = (RTT * 7 + rtt) / 8,更新RTTVar = (RTTVar * 3 + abs(RTT - rtt)) / 4,更新cc中 rtt。
- 更新被确认的最大ACK序号。
void CUDT::processCtrl(CPacket& ctrlpkt)
{
case 6: //110 - Acknowledgement of Acknowledgement
{
int32_t ack;
int rtt = -1;
// update RTT
rtt = m_pACKWindow->acknowledge(ctrlpkt.getAckSeqNo(), ack);
if (rtt <= 0)
break;
//if increasing delay detected...
// sendCtrl(4);
// RTT EWMA
m_iRTTVar = (m_iRTTVar * 3 + abs(rtt - m_iRTT)) >> 2;
m_iRTT = (m_iRTT * 7 + rtt) >> 3;
m_pCC->setRTT(m_iRTT);
// update last ACK that has been received by the sender
if (CSeqNo::seqcmp(ack, m_iRcvLastAckAck) > 0)
m_iRcvLastAckAck = ack;
break;
}
}
- NAK 处理
- 将 NAK 中携带的所有序号添加到发送丢失链表中。通过码率控制更新 SND 周期。重置 EXP 时间变量。更新 m_pSndUList,等待重传。
void CUDT::processCtrl(CPacket& ctrlpkt)
{
case 3: //011 - Loss Report
{
int32_t* losslist = (int32_t *)(ctrlpkt.m_pcData);
m_pCC->onLoss(losslist, ctrlpkt.getLength() / 4);//拥塞控制丢包处理,比如停止慢启动,更新参数。
CCUpdate();
bool secure = true;
// decode loss list message and insert loss into the sender loss list
for (int i = 0, n = (int)(ctrlpkt.getLength() / 4); i < n; ++ i)
{
if (0 != (losslist[i] & 0x80000000))
{
if ((CSeqNo::seqcmp(losslist[i] & 0x7FFFFFFF, losslist[i + 1]) > 0) || (CSeqNo::seqcmp(losslist[i + 1], m_iSndCurrSeqNo) > 0))
{
// seq_a must not be greater than seq_b; seq_b must not be greater than the most recent sent seq
secure = false;
break;
}
int num = 0;
if (CSeqNo::seqcmp(losslist[i] & 0x7FFFFFFF, m_iSndLastAck) >= 0)
num = m_pSndLossList->insert(losslist[i] & 0x7FFFFFFF, losslist[i + 1]);
else if (CSeqNo::seqcmp(losslist[i + 1], m_iSndLastAck) >= 0)
num = m_pSndLossList->insert(m_iSndLastAck, losslist[i + 1]);
m_iTraceSndLoss += num;
m_iSndLossTotal += num;
++ i;
}
else if (CSeqNo::seqcmp(losslist[i], m_iSndLastAck) >= 0)
{
if (CSeqNo::seqcmp(losslist[i], m_iSndCurrSeqNo) > 0)
{
//seq_a must not be greater than the most recent sent seq
secure = false;
break;
}
int num = m_pSndLossList->insert(losslist[i], losslist[i]);
m_iTraceSndLoss += num;
m_iSndLossTotal += num;
}
}
if (!secure)
{
//this should not happen: attack or bug
m_bBroken = true;
m_iBrokenCounter = 0;
break;
}
// the lost packet (retransmission) should be sent out immediately
m_pSndQueue->m_pSndUList->update(this);
++ m_iRecvNAK;
++ m_iRecvNAKTotal;
break;
}
}
- Handshake 处理
case 0: //000 - Handshake
{
CHandShake req;
req.deserialize(ctrlpkt.m_pcData, ctrlpkt.getLength());
if ((req.m_iReqType > 0) || (m_bRendezvous && (req.m_iReqType != -2)))
{
// The peer side has not received the handshake message, so it keeps querying
// resend the handshake packet
CHandShake initdata;
initdata.m_iISN = m_iISN;
initdata.m_iMSS = m_iMSS;
initdata.m_iFlightFlagSize = m_iFlightFlagSize;
initdata.m_iReqType = (!m_bRendezvous) ? -1 : -2;
initdata.m_iID = m_SocketID;
char* hs = new char [m_iPayloadSize];
int hs_size = m_iPayloadSize;
initdata.serialize(hs, hs_size);
sendCtrl(0, NULL, hs, hs_size);
delete [] hs;
}
break;
}
}
- Msg drop request 处理
- 在接收缓冲中标记所有属于同一个消息的包,使得不再可读。 在接收丢失链表中移除所有对应的包。
case 7: //111 - Msg drop request
m_pRcvBuffer->dropMsg(ctrlpkt.getMsgSeq());
m_pRcvLossList->remove(*(int32_t*)ctrlpkt.m_pcData, *(int32_t*)(ctrlpkt.m_pcData + 4));
// move forward with current recv seq no.
if ((CSeqNo::seqcmp(*(int32_t*)ctrlpkt.m_pcData, CSeqNo::incseq(m_iRcvCurrSeqNo)) <= 0)
&& (CSeqNo::seqcmp(*(int32_t*)(ctrlpkt.m_pcData + 4), m_iRcvCurrSeqNo) > 0))
{
m_iRcvCurrSeqNo = *(int32_t*)(ctrlpkt.m_pcData + 4);
}
break;
}