RDMA编程实践-SEND-RECEICVE原语应用

RDMA编程实践

本文描述了RDMA编程过程中的SEND-RECEIVE双边原语的代码实现。包含多个版本,1、client向server发送消息,server回复client收到消息(ACK),然后两边断开连接。2、server端循环等待客户端建立连接,client发送一次消息后,双方断开连接。3、server端循环等待客户端建立连接,一旦建立,client端可以一直向server端发送消息,直到发送消息为disconnect,server和client断开链接,但是server此时仍然可以等待别的client发送消息。
代码基于代码基于send-receive样例实现。关于代码注释,可以参考代码解释
Makefile文件、会编译当前目录下的所有.c文件:

.PHONY: all clean

CC := gcc
CFLAGS := -Wall -g
LDLIBS := -lrdmacm -libverbs -lpthread -g

SRCS := $(wildcard *.c)
APPS := $(SRCS:.c=)

all: $(APPS)

%: %.c
	$(CC) $(CFLAGS) $< -o $@ $(LDLIBS)

clean:
	rm -f $(APPS)

version1 客户端-服务端消息一次传递

在这个阶段,我们希望能实现下面这样一个场景。client与server端相连接,client端能够发送一条消息给server,server收到该条消息之后恢复一条消息给client端表示我已经确认收到。之后两者断开连接。

代码:

// client1.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
#include <errno.h>
#include <getopt.h>
#include <rdma/rdma_cma.h>
#include <rdma/rdma_verbs.h>

static const char *server = "10.10.10.1";
static const char *port = "7471";

static struct rdma_cm_id *id;
static struct ibv_mr *mr, *send_mr;
static int send_flags;
static uint8_t send_msg[16];
static uint8_t recv_msg[16];

static int run(void)
{
	struct rdma_addrinfo hints, *res;
	struct ibv_qp_init_attr attr;
	struct ibv_wc wc;
	int ret;

	memset(&hints, 0, sizeof hints);
	hints.ai_port_space = RDMA_PS_TCP;
	ret = rdma_getaddrinfo(server, port, &hints, &res);
	if (ret) {
		printf("rdma_getaddrinfo: %s\n", gai_strerror(ret));
		goto out;
	}

	memset(&attr, 0, sizeof attr);
	attr.cap.max_send_wr = attr.cap.max_recv_wr = 1;
	attr.cap.max_send_sge = attr.cap.max_recv_sge = 1;
	attr.cap.max_inline_data = 16;
	attr.qp_context = id;
	attr.sq_sig_all = 1;
	ret = rdma_create_ep(&id, res, NULL, &attr);
	// Check to see if we got inline data allowed or not
	if (attr.cap.max_inline_data >= 16)
		send_flags = IBV_SEND_INLINE;
	else
		printf("rdma_client: device doesn't support IBV_SEND_INLINE, "
		       "using sge sends\n");

	if (ret) {
		perror("rdma_create_ep");
		goto out_free_addrinfo;
	}

	mr = rdma_reg_msgs(id, recv_msg, 16);
	if (!mr) {
		perror("rdma_reg_msgs for recv_msg");
		ret = -1;
		goto out_destroy_ep;
	}
	if ((send_flags & IBV_SEND_INLINE) == 0) {
		send_mr = rdma_reg_msgs(id, send_msg, 16);
		if (!send_mr) {
			perror("rdma_reg_msgs for send_msg");
			ret = -1;
			goto out_dereg_recv;
		}
	}

	ret = rdma_post_recv(id, NULL, recv_msg, 16, mr);
	if (ret) {
		perror("rdma_post_recv");
		goto out_dereg_send;
	}

	ret = rdma_connect(id, NULL);
	if (ret) {
		perror("rdma_connect");
		goto out_dereg_send;
	}


	printf("client send: %s\n", (char *)send_msg);
	ret = rdma_post_send(id, NULL, send_msg, 16, send_mr, send_flags);
	if (ret) {
		perror("rdma_post_send");
		goto out_disconnect;
	}

	while ((ret = rdma_get_send_comp(id, &wc)) == 0);
	if (ret < 0) {
		perror("rdma_get_send_comp");
		goto out_disconnect;
	}

	while ((ret = rdma_get_recv_comp(id, &wc)) == 0);
	if (ret < 0)
		perror("rdma_get_recv_comp");
	else
		ret = 0;
	
	printf("client received: %s\n", (char *) recv_msg);

out_disconnect:
	rdma_disconnect(id);
out_dereg_send:
	if ((send_flags & IBV_SEND_INLINE) == 0)
		rdma_dereg_mr(send_mr);
out_dereg_recv:
	rdma_dereg_mr(mr);
out_destroy_ep:
	rdma_destroy_ep(id);
out_free_addrinfo:
	rdma_freeaddrinfo(res);
out:
	return ret;
}

int main(int argc, char **argv)
{
	int ret;

	char *s = "hello world";
	// printf("client send: %s\n", s);
	memcpy(send_msg, s , strlen(s));

	printf("rdma_client: start\n");
	ret = run();
	printf("rdma_client: end %d\n", ret);
	return ret;
}

server端代码

// server1.c
/*
 * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under the OpenIB.org BSD license
 * below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>
#include <netdb.h>
#include <rdma/rdma_cma.h>
#include <rdma/rdma_verbs.h>

static const char *server = "0.0.0.0";
static const char *port = "7471";

static struct rdma_cm_id *listen_id, *id;
static struct ibv_mr *mr, *send_mr;
static int send_flags;
static uint8_t send_msg[16];
static uint8_t recv_msg[16];

static int run(void)
{
	struct rdma_addrinfo hints, *res;
	struct ibv_qp_init_attr init_attr;
	struct ibv_qp_attr qp_attr;
	struct ibv_wc wc;
	int ret;

	memset(&hints, 0, sizeof hints);
	hints.ai_flags = RAI_PASSIVE;
	hints.ai_port_space = RDMA_PS_TCP;
	ret = rdma_getaddrinfo(server, port, &hints, &res);
	if (ret) {
		printf("rdma_getaddrinfo: %s\n", gai_strerror(ret));
		return ret;
	}

	memset(&init_attr, 0, sizeof init_attr);
	init_attr.cap.max_send_wr = init_attr.cap.max_recv_wr = 1;
	init_attr.cap.max_send_sge = init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_inline_data = 16;
	init_attr.sq_sig_all = 1;
	ret = rdma_create_ep(&listen_id, res, NULL, &init_attr);
	if (ret) {
		perror("rdma_create_ep");
		goto out_free_addrinfo;
	}

	ret = rdma_listen(listen_id, 0);
	if (ret) {
		perror("rdma_listen");
		goto out_destroy_listen_ep;
	}

	ret = rdma_get_request(listen_id, &id);
	if (ret) {
		perror("rdma_get_request");
		goto out_destroy_listen_ep;
	}

	memset(&qp_attr, 0, sizeof qp_attr);
	memset(&init_attr, 0, sizeof init_attr);
	ret = ibv_query_qp(id->qp, &qp_attr, IBV_QP_CAP,
			   &init_attr);
	if (ret) {
		perror("ibv_query_qp");
		goto out_destroy_accept_ep;
	}
	if (init_attr.cap.max_inline_data >= 16)
		send_flags = IBV_SEND_INLINE;
	else
		printf("rdma_server: device doesn't support IBV_SEND_INLINE, "
		       "using sge sends\n");

	mr = rdma_reg_msgs(id, recv_msg, 16);
	if (!mr) {
		ret = -1;
		perror("rdma_reg_msgs for recv_msg");
		goto out_destroy_accept_ep;
	}
	if ((send_flags & IBV_SEND_INLINE) == 0) {
		send_mr = rdma_reg_msgs(id, send_msg, 16);
		if (!send_mr) {
			ret = -1;
			perror("rdma_reg_msgs for send_msg");
			goto out_dereg_recv;
		}
	}

	ret = rdma_post_recv(id, NULL, recv_msg, 16, mr);
	if (ret) {
		perror("rdma_post_recv");
		goto out_dereg_send;
	}

	ret = rdma_accept(id, NULL);
	if (ret) {
		perror("rdma_accept");
		goto out_dereg_send;
	}

	while ((ret = rdma_get_recv_comp(id, &wc)) == 0);
	if (ret < 0) {
		perror("rdma_get_recv_comp");
		goto out_disconnect;
	}

    printf("server received: %s\n" , (char *)recv_msg);
    
    char *s = "ACK";
    memcpy(send_msg, s, strlen(s));
    printf("server send: %s\n", (char *)send_msg);

	ret = rdma_post_send(id, NULL, send_msg, 16, send_mr, send_flags);
	if (ret) {
		perror("rdma_post_send");
		goto out_disconnect;
	}

	while ((ret = rdma_get_send_comp(id, &wc)) == 0);
	if (ret < 0)
		perror("rdma_get_send_comp");
	else
		ret = 0;

out_disconnect:
	rdma_disconnect(id);
out_dereg_send:
	if ((send_flags & IBV_SEND_INLINE) == 0)
		rdma_dereg_mr(send_mr);
out_dereg_recv:
	rdma_dereg_mr(mr);
out_destroy_accept_ep:
	rdma_destroy_ep(id);
out_destroy_listen_ep:
	rdma_destroy_ep(listen_id);
out_free_addrinfo:
	rdma_freeaddrinfo(res);
	return ret;
}

int main(int argc, char **argv)
{
	int ret;

	printf("rdma_server: start\n");
	ret = run();
	printf("rdma_server: end %d\n", ret);
	return ret;
}

首先make编译完之后,在server端执行 ./server1,然后在客户端执行./client1
运行结果:
在这里插入图片描述
在这里插入图片描述
可以看到 client向server发送了hello world,server收到之后打印出来并回复给client端ACK消息,client收到之后并打印。最后双方断开连接,完成!

version2-客户端发送一次,服务端循环等待

client2的代码跟上面一样,server2代码不一样。
server2的逻辑:在run函数进来之后记录一个connect点,当远程客户端发送完信息后,释放连接的资源,跳转到connect阶段准备让下一个client连接。

/*
 * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under the OpenIB.org BSD license
 * below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>
#include <netdb.h>
#include <rdma/rdma_cma.h>
#include <rdma/rdma_verbs.h>


#define N 100
#define MAX_CAP 32

static const char *server = "0.0.0.0";
static const char *port = "7471";

static struct rdma_cm_id *listen_id, *id;
static struct ibv_mr *mr, *send_mr;
static int send_flags;
static uint8_t send_msg[MAX_CAP];
static uint8_t recv_msg[MAX_CAP];

static int run(void)
{
	struct rdma_addrinfo hints, *res;
	struct ibv_qp_init_attr init_attr;
	struct ibv_qp_attr qp_attr;
	struct ibv_wc wc;
	int ret;

    while(1)
    {

        memset(&hints, 0, sizeof hints);
        hints.ai_flags = RAI_PASSIVE;
        hints.ai_port_space = RDMA_PS_TCP;
        ret = rdma_getaddrinfo(server, port, &hints, &res);
        if (ret) {
            printf("rdma_getaddrinfo: %s\n", gai_strerror(ret));
            return ret;
        }

        memset(&init_attr, 0, sizeof init_attr);
        init_attr.cap.max_send_wr = init_attr.cap.max_recv_wr = N;
        init_attr.cap.max_send_sge = init_attr.cap.max_recv_sge = 1;
        init_attr.cap.max_inline_data = MAX_CAP;
        init_attr.sq_sig_all = 1;

        ret = rdma_create_ep(&listen_id, res, NULL, &init_attr);
        if (ret) {
            perror("rdma_create_ep");
            goto out_free_addrinfo;
        }

        ret = rdma_listen(listen_id, 0);
        if (ret) {
            perror("rdma_listen");
            goto out_destroy_listen_ep;
        }

        ret = rdma_get_request(listen_id, &id);
        if (ret) {
            perror("rdma_get_request");
            goto out_destroy_listen_ep;
        }

        memset(&qp_attr, 0, sizeof qp_attr);
        memset(&init_attr, 0, sizeof init_attr);
        ret = ibv_query_qp(id->qp, &qp_attr, IBV_QP_CAP,
                &init_attr);
        if (ret) {
            perror("ibv_query_qp");
            goto out_destroy_accept_ep;
        }
        if (init_attr.cap.max_inline_data >= MAX_CAP)
            send_flags = IBV_SEND_INLINE;
        else
            printf("rdma_server: device doesn't support IBV_SEND_INLINE, "
                "using sge sends\n");
        mr = rdma_reg_msgs(id, recv_msg, N);
        if (!mr) {
            ret = -1;
            perror("rdma_reg_msgs for recv_msg");
            goto out_destroy_accept_ep;
        }
        if ((send_flags & IBV_SEND_INLINE) == 0) {
            send_mr = rdma_reg_msgs(id, send_msg, MAX_CAP);
            if (!send_mr) {
                ret = -1;
                perror("rdma_reg_msgs for send_msg");
                goto out_dereg_recv;
            }
        }   
        

        ret = rdma_accept(id, NULL);
        if (ret) {
            perror("rdma_accept");
            goto out_dereg_send;
        }

        
        memset(recv_msg, 0 , sizeof recv_msg);
        memset(send_msg, 0 , sizeof send_msg);
        ret = rdma_post_recv(id, NULL, recv_msg, MAX_CAP, mr);
        if (ret) {
            perror("rdma_post_recv");
            goto out_dereg_send;
        }
        while ((ret = rdma_get_recv_comp(id, &wc)) == 0);
        if (ret < 0) {
            perror("rdma_get_recv_comp");
            goto out_disconnect;
        }
        
        printf("server received: %s\n", (char *)recv_msg);
        memcpy(send_msg, recv_msg, sizeof(recv_msg));
        
        ret = rdma_post_send(id, NULL, send_msg, MAX_CAP, send_mr, send_flags);
        if (ret) {
            perror("rdma_post_send");
            goto out_disconnect;
        }
        while ((ret = rdma_get_send_comp(id, &wc)) == 0); // 确认对方已经收到 对方会发送ack
        if (ret < 0)
            perror("rdma_get_send_comp");
        else
            ret = 0;
        
        rdma_disconnect(id);
        if ((send_flags & IBV_SEND_INLINE) == 0)
            rdma_dereg_mr(send_mr);
        rdma_dereg_mr(mr);
        rdma_destroy_ep(id);
        rdma_destroy_ep(listen_id);
        rdma_freeaddrinfo(res);  
    }
         
    

out_disconnect:
    rdma_disconnect(id);
out_dereg_send:
    if ((send_flags & IBV_SEND_INLINE) == 0)
        rdma_dereg_mr(send_mr);
out_dereg_recv:
    rdma_dereg_mr(mr);
out_destroy_accept_ep:
    rdma_destroy_ep(id);
out_destroy_listen_ep:
    rdma_destroy_ep(listen_id);
out_free_addrinfo:
    rdma_freeaddrinfo(res);  

	return ret;
}

int main(int argc, char **argv)
{
	int ret;
	printf("rdma_server: start\n");
	ret = run();
	printf("rdma_server: end %d\n", ret);
	return ret;
}

运行结果:
在这里插入图片描述

在这里插入图片描述

可以看到客户端发送一次消息之后便结束了,服务端却一直等待连接,直到按下ctrl+c。

version3-客户端循环发送,服务端循环等待,一次连接

和上述版本2不同的时候,这里client和server只连接一次,然后可以多次发送消息。直到client发送的消息为disconnect

// client3.c
/*
 * Copyright (c) 2010 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under the OpenIB.org BSD license
 * below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
#include <errno.h>
#include <getopt.h>
#include <rdma/rdma_cma.h>
#include <rdma/rdma_verbs.h>

#define N 100
#define MAX_CAP 32

static const char *server = "10.10.10.1";
static const char *port = "7471";

static struct rdma_cm_id *id;
static struct ibv_mr *mr, *send_mr;
static int send_flags;
static uint8_t send_msg[MAX_CAP];
static uint8_t recv_msg[MAX_CAP];

static int run(void)
{
	struct rdma_addrinfo hints, *res;
	struct ibv_qp_init_attr attr;
	struct ibv_wc wc;
	int ret;

	memset(&hints, 0, sizeof hints);
	hints.ai_port_space = RDMA_PS_TCP;
	ret = rdma_getaddrinfo(server, port, &hints, &res);
	if (ret) {
		printf("rdma_getaddrinfo: %s\n", gai_strerror(ret));
		goto out;
	}
    memset(&attr, 0, sizeof attr);
    attr.cap.max_send_wr = attr.cap.max_recv_wr = 5;
    attr.cap.max_send_sge = attr.cap.max_recv_sge = 1;
    attr.cap.max_inline_data = MAX_CAP;
    attr.qp_context = id;
    attr.sq_sig_all = 1;
    ret = rdma_create_ep(&id, res, NULL, &attr);
    // Check to see if we got inline data allowed or not
    if (attr.cap.max_inline_data >= MAX_CAP)
        send_flags = IBV_SEND_INLINE;
    else
        printf("rdma_client: device doesn't support IBV_SEND_INLINE, "
            "using sge sends\n");

    if (ret) {
        perror("rdma_create_ep");
        goto out_free_addrinfo;
    }

    mr = rdma_reg_msgs(id, recv_msg, MAX_CAP);
    if (!mr) {
        perror("rdma_reg_msgs for recv_msg");
        ret = -1;
        goto out_destroy_ep;
    }
    if ((send_flags & IBV_SEND_INLINE) == 0) {
        send_mr = rdma_reg_msgs(id, send_msg, MAX_CAP);
        if (!send_mr) {
            perror("rdma_reg_msgs for send_msg");
            ret = -1;
            goto out_dereg_recv;
        }
    }

    // ret = rdma_post_recv(id, NULL, recv_msg, 16, mr);
    // if (ret) {
    //     perror("rdma_post_recv");
    //     goto out_dereg_send;
    // }

    // printf("123\n");

    ret = rdma_connect(id, NULL);
    if (ret) {
        perror("rdma_connect");
        goto out_dereg_send;
    }

    while(1)
    {
        // sleep(5);
        memset(recv_msg, 0 , sizeof recv_msg);
        memset(send_msg, 0 , sizeof send_msg);
        printf("input send message: ");
        scanf("%s", send_msg);
        getchar();
        
        ret = rdma_post_recv(id, NULL, recv_msg, MAX_CAP, mr);
        if (ret) {
            perror("rdma_post_recv");
            goto out_dereg_send;
        }

        ret = rdma_post_send(id, NULL, send_msg, MAX_CAP, send_mr, send_flags);
        if (ret) {
            perror("rdma_post_send");
            goto out_disconnect;
        }

        while ((ret = rdma_get_send_comp(id, &wc)) == 0);
        if (ret < 0) {
            perror("rdma_get_send_comp");
            goto out_disconnect;
        }

        while ((ret = rdma_get_recv_comp(id, &wc)) == 0);
        if (ret < 0)
            perror("rdma_get_recv_comp");
        else
            ret = 0;
        
        if(strcmp((char*)send_msg,"disconnect") == 0)
        {
            printf("disconnect\n");
            goto out_disconnect;
        }
        else
        {
            printf("%s\n", recv_msg);
        }
            
    }

out_disconnect:
    rdma_disconnect(id);
out_dereg_send:
    if ((send_flags & IBV_SEND_INLINE) == 0)
            rdma_dereg_mr(send_mr);
out_dereg_recv:
    rdma_dereg_mr(mr);
out_destroy_ep:
    rdma_destroy_ep(id);
out_free_addrinfo:
    rdma_freeaddrinfo(res);
out:
    return ret;

}



int main(int argc, char **argv)
{
	int ret;
    //memcpy(send_msg, argv[1], 50);
	// while ((op = getopt(argc, argv, "s:p:")) != -1) {
	// 	switch (op) {
	// 	case 's':
	// 		server = optarg;
	// 		break;
	// 	case 'p':
	// 		port = optarg;
	// 		break;
	// 	default:
	// 		printf("usage: %s\n", argv[0]);
	// 		printf("\t[-s server_address]\n");
	// 		printf("\t[-p port_number]\n");
	// 		exit(1);
	// 	}
	// }

	printf("rdma_client: start\n");
	ret = run();
	printf("rdma_client: end %d\n", ret);
    
	return ret;
}
// server3.c
/*
 * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under the OpenIB.org BSD license
 * below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>
#include <netdb.h>
#include <rdma/rdma_cma.h>
#include <rdma/rdma_verbs.h>


#define N 100
#define MAX_CAP 32

static const char *server = "0.0.0.0";
static const char *port = "7471";

static struct rdma_cm_id *listen_id, *id;
static struct ibv_mr *mr, *send_mr;
static int send_flags;
static uint8_t send_msg[MAX_CAP];
static uint8_t recv_msg[MAX_CAP];

static int run(void)
{
	struct rdma_addrinfo hints, *res;
	struct ibv_qp_init_attr init_attr;
	struct ibv_qp_attr qp_attr;
	struct ibv_wc wc;
	int ret;

connect:
	memset(&hints, 0, sizeof hints);
	hints.ai_flags = RAI_PASSIVE;
	hints.ai_port_space = RDMA_PS_TCP;
	ret = rdma_getaddrinfo(server, port, &hints, &res);
	if (ret) {
		printf("rdma_getaddrinfo: %s\n", gai_strerror(ret));
		return ret;
	}

	memset(&init_attr, 0, sizeof init_attr);
	init_attr.cap.max_send_wr = init_attr.cap.max_recv_wr = N;
	init_attr.cap.max_send_sge = init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_inline_data = MAX_CAP;
	init_attr.sq_sig_all = 1;

	ret = rdma_create_ep(&listen_id, res, NULL, &init_attr);
	if (ret) {
		perror("rdma_create_ep");
		goto out_free_addrinfo;
	}

	ret = rdma_listen(listen_id, 0);
	if (ret) {
		perror("rdma_listen");
		goto out_destroy_listen_ep;
	}

    ret = rdma_get_request(listen_id, &id);
    if (ret) {
        perror("rdma_get_request");
        goto out_destroy_listen_ep;
    }

    memset(&qp_attr, 0, sizeof qp_attr);
    memset(&init_attr, 0, sizeof init_attr);
    ret = ibv_query_qp(id->qp, &qp_attr, IBV_QP_CAP,
            &init_attr);
    if (ret) {
        perror("ibv_query_qp");
        goto out_destroy_accept_ep;
    }
    if (init_attr.cap.max_inline_data >= MAX_CAP)
        send_flags = IBV_SEND_INLINE;
    else
        printf("rdma_server: device doesn't support IBV_SEND_INLINE, "
            "using sge sends\n");
    mr = rdma_reg_msgs(id, recv_msg, N);
    if (!mr) {
        ret = -1;
        perror("rdma_reg_msgs for recv_msg");
        goto out_destroy_accept_ep;
    }
    if ((send_flags & IBV_SEND_INLINE) == 0) {
        send_mr = rdma_reg_msgs(id, send_msg, MAX_CAP);
        if (!send_mr) {
            ret = -1;
            perror("rdma_reg_msgs for send_msg");
            goto out_dereg_recv;
        }
    }   
    

    ret = rdma_accept(id, NULL);
    if (ret) {
        perror("rdma_accept");
        goto out_dereg_send;
    }

    while (1) {
        memset(recv_msg, 0 , sizeof recv_msg);
        memset(send_msg, 0 , sizeof send_msg);
        ret = rdma_post_recv(id, NULL, recv_msg, MAX_CAP, mr);
        if (ret) {
            perror("rdma_post_recv");
            goto out_dereg_send;
        }
        while ((ret = rdma_get_recv_comp(id, &wc)) == 0);
        if (ret < 0) {
            perror("rdma_get_recv_comp");
            goto out_disconnect;
        }
        
        char *s = (char *)recv_msg;
        int total_length = strlen("server get ") + strlen(s); // 加1是为了存储字符串结束符'\0'
        char *recv_str = (char *)malloc(total_length);  // 分配足够的空间
        strcpy(recv_str, "server get ");
        strcat(recv_str, s);
        //printf("%s\n", recv_str);
        memcpy(send_msg, recv_str, strlen(recv_str));
        
        ret = rdma_post_send(id, NULL, send_msg, MAX_CAP, send_mr, send_flags);
        if (ret) {
            perror("rdma_post_send");
            goto out_disconnect;
        }

        if(strcmp((char*)recv_msg,"disconnect") == 0)
        {
            //printf("%s\n",recv_msg);
            printf("client disconnect\n");
            rdma_disconnect(id);
            if ((send_flags & IBV_SEND_INLINE) == 0)
                rdma_dereg_mr(send_mr);
            rdma_dereg_mr(mr);
            rdma_destroy_ep(id);
            rdma_destroy_ep(listen_id);
            rdma_freeaddrinfo(res);  
            //goto out_disconnect;
            goto connect;
            
        }
        else
        {
            printf("%s\n", recv_msg);
        }
        
        // while ((ret = rdma_get_send_comp(id, &wc)) == 0); // 确认对方已经收到 对方发送ack
        // printf("after send\n");
        // if (ret < 0)
        //     perror("rdma_get_send_comp");
        // else
		//     ret = 0;
         
    }

out_disconnect:
    rdma_disconnect(id);
out_dereg_send:
    if ((send_flags & IBV_SEND_INLINE) == 0)
        rdma_dereg_mr(send_mr);
out_dereg_recv:
    rdma_dereg_mr(mr);
out_destroy_accept_ep:
    rdma_destroy_ep(id);
out_destroy_listen_ep:
    rdma_destroy_ep(listen_id);
out_free_addrinfo:
    rdma_freeaddrinfo(res);  

	return ret;
}

int main(int argc, char **argv)
{
	int op, ret;

	while ((op = getopt(argc, argv, "s:p:")) != -1) {
		switch (op) {
		case 's':
			server = optarg;
			break;
		case 'p':
			port = optarg;
			break;
		default:
			printf("usage: %s\n", argv[0]);
			printf("\t[-s server_address]\n");
			printf("\t[-p port_number]\n");
			exit(1);
		}
	}

	printf("rdma_server: start\n");
	ret = run();
	printf("rdma_server: end %d\n", ret);
	return ret;
}

运行结果:

在这里插入图片描述
在这里插入图片描述

总结:

本文实现了rdma中send-receive双边原语的三种需求版本,从单次发送到两者都能多次发送。理解其中的代码逻辑,想要发送消息之前对端得创建一个recv队列用来接收消息。发送完了有一个发送完成队列,接收完了也有一个接收完成队列。最后双方断开连接需要一起断开,不能某一方执行disconnect另一方不执行。本次实验有一个关键点:

while ((ret = rdma_get_send_comp(id, &wc)) == 0)

这一行代码是等待发送成功,发送成功之后,对方会给一个隐式信息表示我已经收到。这里耗费得时间比较长一点,在版本3中,如果不注释掉,在server端的receive队列还没有建立好,这就导致client发送了消息,server还没有收到,双方就陷入了死循环中。

第1章 RDMA背景简介 ............................................. 5 第2章 哪些网络协议支持RDMA ..................................... 8 2.1 InfiniBand(IB)........................................... 8 2.2 RDMA过融合以太网(RoCE)................................... 8 2.3 互联网广域RDMA协议(iWARP)................................ 8 第3章 RDMA技术优势 ............................................. 9 第4章 RDMA有哪些不同实现 ...................................... 10 第5章 RDMA有哪些标准组织 ...................................... 14 第6章 应用和RNIC传输接口层 .................................... 18 6.1 内存Verbs(Memory Verbs)............................... 19 6.2 消息Verbs(Messaging Verbs)............................ 20 第7章 RDMA传输分类方式 ........................................ 20 7.1 RDMA原语................................................ 21 7.2 RDMA 队列对(QP)....................................... 23 7.3 RDMA完成事件............................................ 23 7.4 RDMA传输类型............................................ 24 7.5 RDMA双边操作解析........................................ 26 7.6 RDMA单边操作解析........................................ 27 7.7 RDMA技术简单总结........................................ 27 第8章 InfiniBand技术和协议架构分析 ............................ 29 8.1 InfiniBand技术的发展.................................... 29 8.2 InfiniBand技术的优势.................................... 30 8.3 InfiniBand基本概念...................................... 32 8.4 InfiniBand协议简介...................................... 33 8.4.1 物理层 ............................................ 34 8.4.2 链路层 ............................................ 34 8.4.3 网络层 ............................................ 34 8.4.4 传输层 ............................................ 35 8.4.5 上层协议 .......................................... 35 8.5 IB应用场景.............................................. 36 第9章 InfiniBand主流厂商和产品分析 ............................ 37 9.1 InfiniBand网络和拓扑.................................... 38 9.2 软件协议栈OFED.......................................... 42 9.3 InfiniBand网络管理...................................... 43 9.4 并行计算集群能力........................................ 44 9.5 基于socket网络应用能力.................................. 45 9.6 存储支持能力............................................ 45 9.7 Mellanox产品介绍........................................ 46 9.8 Infiniband交换机........................................ 48 9.9 InfiniBand适配器........................................ 51 9.10 Infiniband路由器和网关设备............................. 52 9.11 Infiniband线缆和收发器................................. 53 9.12 InfiniBand主要构件总结................................. 54 9.13 InfiniBand对现有应用的支持和ULPs支持................... 55 第10章 RDMA over TCP(iWARP)协议和工作原理 ..................... 56 10.1 RDMA相关简介........................................... 57 10.2 RDMA工作原理........................................... 59 10.3 RDMA 操作类型.......................................... 61 10.4 RDMA over TCP详解...................................... 61 10.5 RDMA标准组织............................................ 7 第11章 RoCE(RDMA over Converged Ethernet)原理 ............... 65 第12章 不同RDMA技术的比较 ..................................... 67 12.1 IB和TCP、Ethernet比较.................................. 69 12.2 RoCE和InfiniBand比较................................... 70 12.3 RoCE和IB协议的技术区别................................. 71 12.4 RoCE和iWARP的区别...................................... 71 第13章 Intel Omni-Path和InfiniBand对比分析 .................... 72 13.1 Intel True Scale Fabric介绍............................ 73 13.2 Intel True Scale InfiniBand产品........................ 74 13.3 Intel Omni-Path产品.................................... 76 第14章 RDMA关键技术延伸 ....................................... 80 14.1 RDMA指令的选择......................................... 80 14.2 慎用atomic类指令....................................... 81 14.3 减少交互次数........................................... 82 14.3.1 Wr 聚合 .......................................... 82 14.3.2 SGE 聚合 ......................................... 82 14.3.3 使用imm数据 ...................................... 83 14.3.4 使用inline数据 ................................... 83 14.3.5 CQE中使用inline数据 .............................. 83 14.3.6 WC聚合 ........................................... 84 14.4 运行模式选择........................................... 84 14.4.1 连接的模式 ....................................... 84 14.4.2 运行模式 ......................................... 85 14.5 性能与并发............................................. 86 14.6 避免CPU缓存抖动........................................ 87 14.7 避免芯片内部的缓存Miss................................. 87 14.8 时延的隐藏............................................. 88 14.8.1 利用Prefetch预取指令 ............................. 88 14.8.2 异步交互操作优先 ................................. 88 14.9 RDMA性能分析........................................... 89
### 回答1: RDMA编程用户手册-官方中文版,是一个介绍RDMA(Remote Direct Memory Access,远程直接内存访问)编程的指南,全文共分为六章,内容详细而清晰。该手册引导读者从熟悉RDMA的基本概念开始,到理解和使用RDMA编程模型,最后为读者提供了一些高级主题,如优化数据传输、多资源管理等。 第一章介绍了RDMA及其相关概念,如IB(InfiniBand,无穷带宽)和RoCE(RDMA over Converged Ethernet,以太网上的RDMA),使读者对RDMA有了初步了解。 第二章讲述了RDMA编程模型及其基本特性,如零拷贝、CPU减轻、低延迟、高吞吐量等。该章还介绍了RDMA的三种通信方式:发送/接收(send/receive)、发送/写(send/write)和原子操作(atomic operations)。 第三章详细介绍了RDMA编程中的一些重要概念,如信号量、内存区域、端点和队列,为读者进一步了解RDMA编程模型打下基础。 第四章详细介绍了RDMA编程接口(APIs),包括IB Verbs(IB词汇)和UCP(Unified Communication Platform,统一通信平台),并提供了相关示例代码和解释。 第五章介绍了RDMA应用的一些高级话题,如数据传输优化、内存区域与队列管理、事件处理等,提供了进一步优化RDMA应用的方法和技巧。 最后一章通过具体的案例分享了RDMA编程的示例,从简单的ping-pong测试到复杂的数据传输、内存区域管理、事件处理等,为读者提供了实际应用经验和使用技巧。 总之,这个手册是一个非常有用的资源,不仅对初学者具有参考价值,也为专业RDMA编程人员提供了实用信息和技巧。 ### 回答2: RDMA 编程用户手册 (官方中文版) 是一份非常详细的技术文档,主要面向使用 RDMA 开发网络应用程序的开发人员。该手册包括了 RDMA 的基本介绍、RDMA 技术的优点、RDMA 编程的基本原理和方法、RDMA 常见编程模式、RDMA 应用编程界面、RDMA 编程工具等内容。通过这份手册,读者可以了解 RDMA 技术的基础知识、掌握 RDMA 编程方法和技巧,更好地开发和优化基于 RDMA 的网络应用程序。 手册首先介绍了 RDMA 的基础概念、优点和实现原理,为读者提供了深入理解 RDMA 技术的基础知识。接着,手册详细介绍了 RDMA 技术的编程方法和基本模式,包括点对点 RDMA、远程读写、原子操作等,给读者提供了开发 RDMA 应用程序的基本指南。同时,手册还介绍了 RDMA API,为读者提供了详细的接口说明和使用方法。此外,手册还介绍了 RDMA 编程工具和调试技巧,方便读者对 RDMA 应用程序进行优化和调试。 总之,RDMA 编程用户手册 (官方中文版) 是一份非常有用的技术文档,对于需要开发和优化基于 RDMA 的网络应用程序的开发人员来说,是一份必备的工具和参考资料。 ### 回答3: RDMA(Remote Direct Memory Access)是一种异步、零拷贝数据传输技术,它允许网络主机直接访问远端主机的内存。RDMA编程用户手册是RDMA编程的权威指南,对于RDMA编程掌握和实践意义重大。 该手册被分为三个主要部分,分别是RDMA概述、RDMA编程RDMA应用,对RDMA编程的基础知识、源代码实现以及应用领域进行了详细描述和讲解。其中,RDMA概述主要介绍RDMA的发展历程、基本原理、硬件支持和软件实现等,为读者提供了深入了解RDMA的基础知识。RDMA编程部分则主要介绍了RDMA编程的基本模型、原子操作、数据类型和粘包处理等,同时提供了丰富的代码实现和案例分析,以方便读者进行实践活动。RDMA应用部分主要讲解RDMA在各种场景中的应用,包括存储系统、网络加速、云计算和高性能计算等,帮助读者了解RDMA技术在各种实际应用领域中的表现和优势。 总体而言,RDMA编程用户手册-官方中文版是一本介绍RDMA编程的权威指南,对于打算了解和掌握RDMA编程技术的人员具有重要意义。该手册不仅提供了丰富的知识资源和代码实现支持,而且分析了RDMA技术在各种场景中的应用场景和优点,为读者掌握RDMA编程技术和加速应用提供了有力支持和指导。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值