基于RDMA的nfs服务

背景

ib网卡+nfs服务实现简单的存储共享,暂时顶替还未上线的存储设备,同时也解决 单纯的使用scp rsync等不支持rdma协议拷贝无法正确使用ib网络

说明

前提是系统上已配置安装好ib网卡驱动,且ib网络正常使用,配置参考
https://blog.csdn.net/qq_43652666/article/details/141422514

大部分步骤网上都有,但是如何将RDMA-NFS模块注入到内核中——在很多教程中却没有提及。若缺少这一步,整个方案就无法实现。
这也是比较坑的一个点网上的文章都是直接load kernel 模块即可,但实际上该模块并没有在mellanox的驱动安装程序中被默认安装

RoCE (RDMA over Converged Ethernet)。RoCE 是一种通过以太网实现 RDMA 的技术,它允许RDMA通信在以太网上运行,而不是仅限于InfiniBand技术。

实施

服务端

apt install nfs-kernel-server nfs-common -y
 
# 更改nfsd服务守护进程
root@host:/opt# egrep "^RPCNFSDCOUNT" /etc/default/nfs-kernel-server
RPCNFSDCOUNT=16
# 重启服务
systemctl restart nfs-server.service
 
# 配置映射出去的路径
root@host:/data1# grep data /etc/exports
/data1 *(rw,async,crossmnt,insecure,fsid=0,no_auth_nlm,no_subtree_check,no_root_squash,no_all_squash)
 
# 生效
root@host:~# exportfs -arv
exporting *:/data1
 
# 验证
root@host:/data1# showmount -e
Export list for bj4090-19:
/data1 *

rdma-nfs模块

服务端和客户端都需要配置rdma nfs模块

在mellanox的网卡驱动解压缩包中有对应deb包,安装即可

如果包不存在,可以重新编译一次源码包,生成对应的包

root@host:~# cd /opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS/
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# ls |grep nfs
mlnx-nfsrdma-dkms_24.04.OFED.24.04.0.7.0.1-1_all.deb
 
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# dpkg -i mlnx-nfsrdma-dkms_24.04.OFED.24.04.0.7.0.1-1_all.deb
Selecting previously unselected package mlnx-nfsrdma-dkms.
(Reading database ... 128797 files and directories currently installed.)
Preparing to unpack mlnx-nfsrdma-dkms_24.04.OFED.24.04.0.7.0.1-1_all.deb ...
Unpacking mlnx-nfsrdma-dkms (24.04.OFED.24.04.0.7.0.1-1) ...
Setting up mlnx-nfsrdma-dkms (24.04.OFED.24.04.0.7.0.1-1) ...
Loading new mlnx-nfsrdma-24.04.OFED.24.04.0.7.0.1 DKMS files...
First Installation: checking all kernels...
Building only for 5.15.0-117-generic
Building for architecture x86_64
Building initial module for 5.15.0-117-generic
Secure Boot not enabled on this system.
Done.
Forcing installation of mlnx-nfsrdma
 
rpcrdma.ko:
Running module version sanity check.
 - Original module
 - Installation
   - Installing to /lib/modules/5.15.0-117-generic/updates/dkms/
 
svcrdma.ko:
Running module version sanity check.
 - Original module
 - Installation
   - Installing to /lib/modules/5.15.0-117-generic/updates/dkms/
 
xprtrdma.ko:
Running module version sanity check.
 - Original module
 - Installation
   - Installing to /lib/modules/5.15.0-117-generic/updates/dkms/
 
depmod...
 
# load kernel module
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# modprobe rpcrdma
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# modprobe xprtrdma
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# modprobe svcrdma
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# lsmod |grep rdma
svcrdma                16384  0
xprtrdma               16384  0
rpcrdma                81920  0
sunrpc                585728  18 nfsd,rpcrdma,auth_rpcgss,lockd,nfs_acl
rdma_ucm               28672  0
rdma_cm               122880  2 rpcrdma,rdma_ucm
iw_cm                  49152  1 rdma_cm
ib_cm                 131072  2 rdma_cm,ib_ipoib
ib_uverbs             135168  26 rdma_ucm,mlx5_ib
ib_core               434176  9 rdma_cm,ib_ipoib,rpcrdma,iw_cm,ib_umad,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm
mlx_compat             69632  14 rdma_cm,ib_ipoib,mlxdevm,rpcrdma,xprtrdma,iw_cm,svcrdma,ib_umad,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core
 
 
# 配置端口
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# echo "rdma 20049" >> /proc/fs/nfsd/portlist
root@host:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# cat /proc/fs/nfsd/portlist
rdma 20049
rdma 20049
tcp 2049
tcp 2049

配置启动nfs-server服务时参数实现自动配置端口及加载模块

root@xxx:~# cat /lib/systemd/system/nfs-server.service
[Unit]
Description=NFS server and services
DefaultDependencies=no
Requires=network.target proc-fs-nfsd.mount
Requires=nfs-mountd.service
Wants=rpcbind.socket network-online.target
Wants=rpc-statd.service nfs-idmapd.service
Wants=rpc-statd-notify.service
Wants=nfsdcld.service
 
After=network-online.target local-fs.target
After=proc-fs-nfsd.mount rpcbind.socket nfs-mountd.service
After=nfs-idmapd.service rpc-statd.service
After=nfsdcld.service
Before=rpc-statd-notify.service
 
# GSS services dependencies and ordering
Wants=auth-rpcgss-module.service
After=rpc-gssd.service gssproxy.service rpc-svcgssd.service
 
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStartPre=-/usr/sbin/exportfs -r
 
ExecStartPre=/sbin/modprobe xprtrdma
ExecStartPre=/sbin/modprobe svcrdma
 
ExecStart=/usr/sbin/rpc.nfsd
 
ExecStartPost=/bin/bash -c "sleep 3 && echo 'rdma 20049' | tee /proc/fs/nfsd/portlist"
 
ExecStop=/usr/sbin/rpc.nfsd 0
ExecStopPost=/usr/sbin/exportfs -au
ExecStopPost=/usr/sbin/exportfs -f
 
ExecReload=-/usr/sbin/exportfs -r
 
[Install]
WantedBy=multi-user.target
 
# 生效
systemctl daemon-reload
# 测试
root@xxx:~# cat /proc/fs/nfsd/portlist
rdma 20049
rdma 20049
tcp 2049
tcp 2049
root@xxx:~# lsmod |grep xprtrdma
xprtrdma               16384  0
mlx_compat             69632  14 rdma_cm,ib_ipoib,mlxdevm,rpcrdma,xprtrdma,iw_cm,svcrdma,ib_umad,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core
root@xxx:~# lsmod |grep svc
svcrdma                16384  0
mlx_compat             69632  14 rdma_cm,ib_ipoib,mlxdevm,rpcrdma,xprtrdma,iw_cm,svcrdma,ib_umad,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core
root@xxx:~# showmount -e
Export list for bj4090-19:
/data1 *

客户端

装包,load module 挂载

apt install nfs-common -y
root@client:/opt/MLNX_OFED_LINUX-24.04-0.7.0.0-ubuntu22.04-x86_64/DEBS# dpkg -i mlnx-nfsrdma-dkms_24.04.OFED.24.04.0.7.0.1-1_all.deb
root@client:~# modprobe xprtrdma
 
root@client:~# lsmod |grep rdma
rpcrdma                77824  0
xprtrdma               16384  0
sunrpc                585728  3 rpcrdma,lockd,nfs
rdma_ucm               28672  0
rdma_cm               122880  2 rpcrdma,rdma_ucm
iw_cm                  49152  1 rdma_cm
ib_cm                 131072  2 rdma_cm,ib_ipoib
ib_uverbs             135168  26 rdma_ucm,mlx5_ib
ib_core               434176  9 rdma_cm,ib_ipoib,rpcrdma,iw_cm,ib_umad,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm
mlx_compat             69632  13 rdma_cm,ib_ipoib,mlxdevm,rpcrdma,xprtrdma,iw_cm,ib_umad,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core
 
root@client:~# mount -o rdma,port=20049 10.255.252.19:/data1 /data2
Created symlink /run/systemd/system/remote-fs.target.wants/rpc-statd.service → /usr/lib/systemd/system/rpc-statd.service.
# proto字段显示为rdma
root@client:~# mount |grep nfs
10.255.252.19:/data1 on /data2 type nfs (rw,relatime,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,proto=rdma,port=20049,timeo=600,retrans=2,sec=sys,mountaddr=10.255.252.19,mountvers=3,mountproto=tcp,local_lock=none,addr=10.255.252.19)
 
root@client:~# ls /data2/

客户端配置启动时自动load rdma nfs模块

# 服务中添加这行
ExecStartPre=/sbin/modprobe xprtrdma
 
 
root@client:/data1# cat /lib/systemd/system/rpcbind.service
[Unit]
Description=RPC bind portmap service
Documentation=man:rpcbind(8)
DefaultDependencies=no
RequiresMountsFor=/run/rpcbind
 
# Make sure we use the IP addresses listed for
# rpcbind.socket, no matter how this unit is started.
Requires=rpcbind.socket
After=systemd-tmpfiles-setup.service
Wants=remote-fs-pre.target rpcbind.target
Before=remote-fs-pre.target rpcbind.target
 
[Service]
Environment="OPTIONS=-w"
ExecStartPre=/sbin/modprobe xprtrdma
ExecStart=/sbin/rpcbind -f $OPTIONS
EnvironmentFile=-/etc/rpcbind.conf
EnvironmentFile=-/etc/default/rpcbind
Type=notify
KillMode=process
Restart=on-failure
 
[Install]
Also=rpcbind.socket
WantedBy=multi-user.target
 
systemctl daemon-reload && systemctl restart rpcbind.service

测试

测试时在服务端使用iftop等网卡类top命令无法看到ib网卡的传输速率,反过来推出数据传输走了rdma协议,未经由系统内核空间
fio测试

root@client:~# apt install fio -y
 
root@client:~# fio -filename=/data2/fio_test -direct=1 -iodepth 1 -thread -rw=read -ioengine=psync -bs=10240k -size=10G -numjobs=60 -runtime=60 -group_reporting -name=mytest --output=./10240k-read-p60-1.json
root@client:~# [R(60)][100.0%][r=17.6GiB/s][r=1806 IOPS][eta 00m:00s]

refer

导入内核问题解决方案
https://forums.developer.nvidia.com/t/how-to-use-nfs-over-rdma-with-mlnx-ofed-solution/207263

其他的参考内容网上都差不多
整体实现参考
https://vqiu.cn/nfs-rdma/

  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值