在上一篇建立RDMA侦听 部分讲了RDMA 通信时server端需要进行的操作,这一篇我们来说一下客户端的流程。
##client端流程
RDMA编程时的client端和socket编程也是大体类似的,只是有一些跟设备相关的细节,包括地址解析,queue pair, memory region 等要自己显式处理。
- rdma_getaddrinfo
获取服务器的地址信息 - rdma_create_event_channel
create channel to receive events - rdma_create_id
allocate an rdma_cm_id, this is conceptually similar to a socket - rdma_resolve_addr
obtain a local RDMA device to reach the remote address - rdma_get_cm_event
wait for RDMA_CM_EVENT_ADDR_RESOLVED event - rdma_ack_cm_event
ack event - rdma_create_qp
allocate a QP for the communication - rdma_resolve_route
determine the route to the remote address - rdma_get_cm_event
wait for RDMA_CM_EVENT_ROUTE_RESOLVED event - rdma_ack_cm_event
ack event - rdma_connect
connect to the remote server - rdma_get_cm_event
wait for RDMA_CM_EVENT_ESTABLISHED event - rdma_ack_cm_event
ack event - ibv_post_send
Perform data transfers over connection - rdma_disconnect
tear-down connection - rdma_get_cm_event
wait for RDMA_CM_EVENT_DISCONNECTED event - rdma_ack_cm_event
ack event - rdma_destroy_qp
destroy the QP - rdma_destroy_id
release the rdma_cm_id - rdma_destroy_event_channel
release the event channel
下面是一个完整的client代码,这个代码可以配合建立RDMA侦听 里的server端代码进行测试。
测试时需要注意,不要使用使用localhost/127.0.0.1这样的回环地址去连接server端,回环地址会绕过RDMA网卡,因此无法进行RDMA通信。
//compile : gcc rdma-client.cpp -libverbs -lrdmacm -o rdma-client
#define _ISOC11_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <infiniband/verbs.h>
#include <rdma/rdma_cma.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
struct rdma_event_channel* ec;
struct rdma_cm_id* cm_id;
struct ibv_pd* pd;
struct ibv_context* rdma_cm_context;
void* recv_buf;
#define RECV_BUF_SIZE 4096
struct ibv_mr* recv_mr;
int on_route_resolved(struct rdma_cm_id* id)
{
int rc = 0;
struct rdma_conn_param cm_params;
struct ibv_qp_init_attr qp_attr={0};
struct PfRdmaConnection* conn = (struct PfRdmaConnection*)id->context;
struct ibv_context* rdma_context = id->verbs;
pd = ibv_alloc_pd(rdma_context);
recv_buf = aligned_alloc(4096, RECV_BUF_SIZE);
recv_mr = ibv_reg_mr(pd, recv_buf, RECV_BUF_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
struct ibv_comp_channel* comp_channel = ibv_create_comp_channel(rdma_context);
struct ibv_cq* cq = ibv_create_cq(rdma_context, 512, NULL, comp_channel, 0);
ibv_req_notify_cq(cq, 0);
qp_attr.send_cq = cq;
qp_attr.recv_cq = cq;
qp_attr.qp_type = IBV_QPT_RC;
qp_attr.cap.max_send_wr = 512;
qp_attr.cap.max_recv_wr = 512;
qp_attr.cap.max_send_sge = 1;
qp_attr.cap.max_recv_sge = 1;
rc = rdma_create_qp(id, pd, &qp_attr);
if (rc)
{
perror("create_qp failed, errno:%d");
return rc;
}
memset(&cm_params, 0, sizeof(cm_params));
cm_params.private_data = NULL;
cm_params.private_data_len = 0;
cm_params.responder_resources = (uint8_t)8;
cm_params.initiator_depth = (uint8_t)8;
cm_params.retry_count = 7;
cm_params.rnr_retry_count = 7;
rc = rdma_connect(id, &cm_params);
if(rc)
{
perror("rdma_connect failed, errno");
return rc;
}
return 0;
}
int main(int argc, char** argv)
{
int port=10121;
int rc = 0;
if(argc != 2){
fprintf(stderr, "Usage: rdma-client <server_ip>\n");
return 1;
}
char* ip_str=argv[1];
struct addrinfo* addr;
rc = getaddrinfo(ip_str, NULL, NULL, &addr);
if(rc)
{
perror("getaddrinfo failed, ");
return rc;
}
((struct sockaddr_in*)addr->ai_addr)->sin_port = htons((uint16_t)port);
printf("connecting to server ip:%s...\n", ip_str);
ec = rdma_create_event_channel();
if(ec == NULL)
{
perror("rdma_create_event_channel failed, errno:");
return errno;
}
rc = rdma_create_id(ec, &cm_id, NULL, RDMA_PS_TCP);
if(rc)
{
perror("rdma_create_id failed, errno");
return rc;
}
rc = rdma_resolve_addr(cm_id, NULL, addr->ai_addr, 500);
if(rc)
{
perror("rdma_resolve_addr failed, errno:");
return rc;
}
printf("Begin process cm event...\n");
struct rdma_cm_event *event = NULL;
while(rdma_get_cm_event(ec, &event) == 0) {
struct rdma_cm_event event_copy;
memcpy(&event_copy, event, sizeof(*event));
rdma_ack_cm_event(event);
switch(event_copy.event)
{
case RDMA_CM_EVENT_ADDR_RESOLVED:
printf("get event RDMA_CM_EVENT_ADDR_RESOLVED\n");
rc = rdma_resolve_route(event_copy.id, 2000);
if(rc)
{
perror("rdma_resolve_route failed, errno:");
return rc;
}
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
printf("get event RDMA_CM_EVENT_ROUTE_RESOLVED\n");
on_route_resolved(event_copy.id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
printf("get event RDMA_CM_EVENT_ESTABLISHED\n");
{
struct ibv_send_wr wr, *bad_wr = NULL;
struct ibv_sge sge;
memset(&wr, 0, sizeof(wr));
wr.wr_id = (uint64_t)1;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
wr.opcode = IBV_WR_SEND;
wr.send_flags = IBV_SEND_SIGNALED;
strcpy((char*)recv_buf, "HelloWorld");
sge.addr = (uint64_t)recv_buf;
sge.length = strlen((char*)recv_buf);
sge.lkey = recv_mr->lkey;
printf("send data to server\n");
rc = ibv_post_send(cm_id->qp, &wr, &bad_wr);
if (rc)
{
fprintf(stderr, "ibv_post_send failed, rc:%d\n", rc);
return rc;
}
return 0;
}
break;
case RDMA_CM_EVENT_DISCONNECTED:
printf("get event RDMA_CM_EVENT_DISCONNECTED\n");
break;
default:
break;
}
}
freeaddrinfo(addr);
return 0;
}
通过这两篇文章,读者应该可以快速的建立一个自己的RDMA通信程序了。在上面的例子里面,server端使用了IBV_RECEIVE操作,client端使用了IBV_SEND操作。这个意义上还是和socket编程很像的。虽然这样操作也避免了类似TCP协议栈在CPU上的开销,但这远不是RDMA的精髓。RDMA总共提供了4个操作,分成两类:
- 双边API (two-sided), 这类API的操作需要server, client双方配合,需要一边先post_recv, 然后另外一边post_send,两个操作完成一次传输。这类操作就是我们例子里用到的
- IBV_SEND
- IBV_RECEIVE
- 单边API (one-sided), 这类API只需要server或者client一边调用API,就可以从另一边的内存里面读写数据。包括
- IBV_READ
- IBV_WRITE
要想用好RDMA, 就要将这两类API配合使用好。往往需要结合业务设计出最合适的协议模型。也欢迎大家到我的开源存储项目PureFlash ,这是一个RDMA在实际项目应用的具体例子。