ncclSocket系NCCL网络通信的基础模块,bootstrap阶段以及多机网络数据通信时会使用该模块,因此需要分析ncclSocket模块代码,理解它的设计。
ncclSocket结构体
ncclSocketAddress结构体可以是sockaddr,sockaddr_in或sockaddr_in6,对应Unix Domain Socket地址,IPV4 Socket地址和IPV6 Socket地址。意味着ncclSocket希望设计成支持UDS,IPV4和IPV6的Socket为不同类型的Socket提供统一接口。
ncclSocketType可以是ncclSocketTypeBootstrap,ncclSocketTypeProxy,ncclSocketTypeNetSocket以及ncclSocketTypeNetIb,涵盖和Bootstrap控制面通信和以太网和IB的数据面通信。意味着ncclSocket硬件层可以支持普通网卡和IB网卡。
/* Common socket address storage structure for IPv4/IPv6 */
union ncclSocketAddress {
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
};
enum ncclSocketType {
ncclSocketTypeUnknown = 0,
ncclSocketTypeBootstrap = 1,
ncclSocketTypeProxy = 2,
ncclSocketTypeNetSocket = 3,
ncclSocketTypeNetIb = 4
};
struct ncclSocket {
int fd;
int acceptFd;
int timedOutRetries;
int refusedRetries;
union ncclSocketAddress addr;
volatile uint32_t* abortFlag;
int asyncFlag;
enum ncclSocketState state;
int salen;
uint64_t magic;
enum ncclSocketType type;
};
ncclSocket接口
ncclSocket接口可分为3类,第一类是IP字符串解析,网络设备搜索等网络相关等工具类接口;第二类是负责Socket构建,监听,连接,销毁等负责Socket状态转换的接口;第三类是发送数据和接受数据接口;不同种类接口已用空行隔开,并用注释加以注解,参见以下代码片段。
// ncclSocketAddress to string
const char *ncclSocketToString(union ncclSocketAddress *addr, char *buf, const int numericHostForm = 1);
// string to ncclSocketAddress
ncclResult_t ncclSocketGetAddrFromString(union ncclSocketAddress* ua, const char* ip_port_pair);
// find network device located in same subnet of remoteAddr and initilize localAddrs
int ncclFindInterfaceMatchSubnet(char* ifNames, union ncclSocketAddress* localAddrs, union ncclSocketAddress* remoteAddr, int ifNameMaxSize, int maxIfs);
// find network device in a fix rule, e.g. IB first then local network etc.
int ncclFindInterfaces(char* ifNames, union ncclSocketAddress *ifAddrs, int ifNameMaxSize, int maxIfs);
// Initialize a socket
ncclResult_t ncclSocketInit(struct ncclSocket* sock, union ncclSocketAddress* addr = NULL, uint64_t magic = NCCL_SOCKET_MAGIC, enum ncclSocketType type = ncclSocketTypeUnknown, volatile uint32_t* abortFlag = NULL, int asyncFlag = 0);
// Create a listening socket. sock->addr can be pre-filled with IP & port info. sock->fd is set after a successful call
ncclResult_t ncclSocketListen(struct ncclSocket* sock);
// Extract ncclSocketAddress from sock
ncclResult_t ncclSocketGetAddr(struct ncclSocket* sock, union ncclSocketAddress* addr);
// Connect to sock->addr. sock->fd is set after a successful call.
ncclResult_t ncclSocketConnect(struct ncclSocket* sock);
// Return socket connection state.
ncclResult_t ncclSocketReady(struct ncclSocket* sock, int *running);
// Accept an incoming connection from listenSock->fd and keep the file descriptor in sock->fd, with the remote side IP/port in sock->addr.
ncclResult_t ncclSocketAccept(struct ncclSocket* sock, struct ncclSocket* ulistenSock);
ncclResult_t ncclSocketGetFd(struct ncclSocket* sock, int* fd);
ncclResult_t ncclSocketSetFd(int fd, struct ncclSocket* sock);
ncclResult_t ncclSocketClose(struct ncclSocket* sock);
#define NCCL_SOCKET_SEND 0
#define NCCL_SOCKET_RECV 1
ncclResult_t ncclSocketProgress(int op, struct ncclSocket* sock, void* ptr, int size, int* offset);
ncclResult_t ncclSocketWait(int op, struct ncclSocket* sock, void* ptr, int size, int* offset);
ncclResult_t ncclSocketSend(struct ncclSocket* sock, void* ptr, int size);
ncclResult_t ncclSocketRecv(struct ncclSocket* sock, void* ptr, int size);
ncclResult_t ncclSocketSendRecv(struct ncclSocket* sendSock, void* sendPtr, int sendSize, struct ncclSocket* recvSock, void* recvPtr, int recvSize);
ncclResult_t ncclSocketTryRecv(struct ncclSocket* sock, void* ptr, int size, int* closed, bool blocking);
ncclSocket状态转换
ncclSocket定义了10种状态,如ncclSocketState定义所示。
enum ncclSocketState {
ncclSocketStateNone = 0,
ncclSocketStateInitialized = 1,
ncclSocketStateAccepting = 2,
ncclSocketStateAccepted = 3,
ncclSocketStateConnecting = 4,
ncclSocketStateConnectPolling = 5,
ncclSocketStateConnected = 6,
ncclSocketStateReady = 7,
ncclSocketStateClosed = 8,
ncclSocketStateError = 9,
ncclSocketStateNum = 10
};
以下是ncclSocket状态转换图,当ncclSocket处于ncclSocketStateReady状态时候即可执行新的任务。