概述
RoCE性能测试主要测试两个BF3之间RDMA的性能,bond模式下只有用SF才可以支持RoCE,因此测试的主要是SF口的RoCE性能。
监测网口RoCE报文统计
通过如下脚本监测网口RoCE报文统计:
root@qyyc01-tron-bf3-240024216:~# cat rdma_stats.sh
#!/bin/bash
CX7 ./rdma_stats.sh 0x15b3 0x1021
IFG_PLUS_PREAMBLE=20 # IFG 12B + Preamble 8B
NETIF=(p0 p1)
c_vendor_id=0x15b3
c_device_id=0xa2FF
if [ $# -eq 2 ]; then
c_vendor_id=$1
c_device_id=$2
fi
index=0
for f in /sys/class/net/*; do
dev=$(basename $f)
if [ -f
f
/
d
e
v
i
c
e
/
v
e
n
d
o
r
]
;
t
h
e
n
v
e
n
d
o
r
i
d
=
f/device/vendor ]; then vendor_id=
f/device/vendor];thenvendorid=(cat
f
/
d
e
v
i
c
e
/
v
e
n
d
o
r
)
d
e
v
i
c
e
i
d
=
f/device/vendor) device_id=
f/device/vendor)deviceid=(cat $f/device/device)
if [[ $vendor_id == $c_vendor_id && $device_id ==
c
d
e
v
i
c
e
i
d
]
]
;
t
h
e
n
p
r
i
n
t
f
"
n
i
c
:
c_device_id ]]; then printf "nic : %10s \n" "
cdeviceid]];thenprintf"nic:dev"
NETIF[
i
n
d
e
x
]
=
index]=
index]=dev
index=$[index+1]
fi
fi
done
length=${#NETIF[@]}
if [ $length -lt 1 ] ; then
echo “no nic”
exit 1
fi
ispfc=0
ispfc=$(mlnx_qos -i eth0x | grep enabled | grep 1 | wc -l)
echo $ispfc
update_stats () { # $name $index
TS_LAST[
2
]
=
2]=
2]={TS[$2]}
R_PKT_LAST[
2
]
=
2]=
2]={R_PKT[$2]}
R_BYTE_LAST[
2
]
=
2]=
2]={R_BYTE[$2]}
T_PKT_LAST[
2
]
=
2]=
2]={T_PKT[$2]}
T_BYTE_LAST[
2
]
=
2]=
2]={T_BYTE[$2]}
if [ $ispfc -eq 1 ] ; then
ETHTOOL=($(ethtool -S $1 | awk '/tx_prio5_packets/{print $2} /rx_prio5_packets/{print $2} /tx_prio5_bytes/{print $2} /rx_prio5_bytes/{print$2}'))
else
ETHTOOL=($(ethtool -S $1 | awk '/tx_prio0_packets/{print $2} /rx_prio0_packets/{print $2} /tx_prio0_bytes/{print $2} /rx_prio0_bytes/{print$2}'))
fi
TS[$2]=$(date +%s%6N) # in usec
T_PKT[$2]=${ETHTOOL[3]}
R_PKT[$2]=${ETHTOOL[1]}
T_BYTE[$2]=${ETHTOOL[2]}
R_BYTE[$2]=${ETHTOOL[0]}
}
for ((i=0; i<$length; i++)) do
printf “[%'9s Rx]: \n” KaTeX parse error: Expected '}', got 'EOF' at end of input: {NETIF[i]}
done
set initial value
index=0
for ((i=0; i<$length; i++)) do
update_stats KaTeX parse error: Expected '}', got 'EOF' at end of input: {NETIF[i]} $index
((index++))
done
index=0
for ((i=0; i<
l
e
n
g
t
h
;
i
+
+
)
)
d
o
R
P
K
T
I
N
I
T
[
length; i++)) do R_PKT_INIT[
length;i++))doRPKTINIT[index]=KaTeX parse error: Expected '}', got 'EOF' at end of input: {R_PKT[index]}
T_PKT_INIT[
i
n
d
e
x
]
=
index]=
index]={T_PKT[KaTeX parse error: Expected 'EOF', got '}' at position 7: index]}̲ R_BYTE_INI…index]=KaTeX parse error: Expected '}', got 'EOF' at end of input: {R_BYTE[index]}
T_BYTE_INIT[
i
n
d
e
x
]
=
index]=
index]={T_BYTE[$index]}
((index++))
done
sleep 1
while true; do
index=0
for ((i=0; i<$length; i++)) do
update_stats ${NETIF[$i]} $index
TS_DIFF=$((${TS[$index]} - ${TS_LAST[$index]}))
R_PKT_DELTA=$((${R_PKT[$index]} - ${R_PKT_LAST[$index]}))
R_PKT_RATE=$(( $R_PKT_DELTA * 1000000 / $TS_DIFF))
R_BIT_DELTA=$(( (${R_BYTE[$index]} - ${R_BYTE_LAST[$index]} + $IFG_PLUS_PREAMBLE * $R_PKT_DELTA) * 8 ))
R_BIT_RATE=$(( $R_BIT_DELTA * 1000000 / $TS_DIFF))
T_PKT_DELTA=$(( ${T_PKT[$index]} - ${T_PKT_LAST[$index]} ))
T_PKT_RATE=$(( $T_PKT_DELTA * 1000000 / $TS_DIFF))
T_BIT_DELTA=$(( (${T_BYTE[$index]} - ${T_BYTE_LAST[$index]} + $IFG_PLUS_PREAMBLE * $T_PKT_DELTA) * 8 ))
T_BIT_RATE=$(( $T_BIT_DELTA * 1000000 / $TS_DIFF))
R_PKT_TOTAL=$(( ${R_PKT[$index]} - ${R_PKT_INIT[$index]} ))
T_PKT_TOTAL=$(( ${T_PKT[$index]} - ${T_PKT_INIT[$index]} ))
R_BYTE_TOTAL=$(( ${R_BYTE[$index]} - ${R_BYTE_INIT[$index]} ))
T_BYTE_TOTAL=$(( ${T_BYTE[$index]} - ${T_BYTE_INIT[$index]} ))
#printf "[%'9s Rx]: %'16d pkts %'16d pps | %'20d bytes %'16d bps \n" ${NETIF[$i]} $R_PKT_TOTAL $R_PKT_RATE $R_BYTE_TOTAL $R_BIT_RATE
#printf "[%'9s Tx]: %'16d pkts %'16d pps | %'20d bytes %'16d bps \n" ${NETIF[$i]} $T_PKT_TOTAL $T_PKT_RATE $T_BYTE_TOTAL $T_BIT_RATE
printf "[%'9s]: %'16d %'16d \n" ${NETIF[$i]} $R_BIT_RATE $T_BIT_RATE
((index++))
done
printf "\n"
sleep 1
done
output:
[ p0]: 1404 0
[ p1]: 0 2847
第一列为名称,第二列为接收的报文的bit数,第三列为发送的报文的bit数,这里面的bit数包括了前导码等,是实际的物理链路上的所有bit数。
单口性能测试
服务端BF3运行:
[root@qyyc01-tron-bf3-240024220 aaa]# ib_write_bw --run_infinitely --duration 1 -d mlx5_0 -F
客户端BF3运行:
root@qyyc01-tron-bf3-240024216:~# ib_write_bw --run_infinitely --duration 1 -d mlx5_0 -F 10.240.24.220
RDMA_Write BW Test
Dual-port : OFF Device : mlx5_0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
ibv_wr* API : ON
TX depth : 128
CQ Moderation : 1
Mtu : 1024[B]
Link type : Ethernet
GID index : 1
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
local address: LID 0000 QPN 0x0097 PSN 0x1d19e6 RKey 0x014605 VAddr 0x00ffffb35ed000
GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:240:24:216
remote address: LID 0000 QPN 0x8b8e PSN 0xd39efd RKey 0x014e6a VAddr 0x00fffff7ad9000
GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:240:24:220
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
65536 176622 0.00 11037.77 0.176604
65536 176641 0.00 11038.06 0.176609
65536 176622 0.00 11037.90 0.176606
客户端BF3查看网口统计值:
root@qyyc01-tron-bf3-240024216:~# ./rdma_stats.sh
0
[ p0 Rx]:
[ p1 Rx]:
[ p0]: 715578916 1555
[ p1]: 706 100014538227
[ p0]: 713382431 5403
[ p1]: 2120 100028736877
[ p0]: 716075307 5403
[ p1]: 2120 100037928448
可以看到单口带宽打到了100G。
双口性能测试
服务端BF3运行:
[root@qyyc01-tron-bf3-240024220 aaa]# ib_write_bw --run_infinitely --duration 1 -d mlx5_0 -q 2 -F
客户端BF3运行:
root@qyyc01-tron-bf3-240024216:~# ib_write_bw --run_infinitely --duration 1 -d mlx5_0 -q 2 -F 10.240.24.220
RDMA_Write BW Test
Dual-port : OFF Device : mlx5_0
Number of qps : 2 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
ibv_wr* API : ON
TX depth : 128
CQ Moderation : 1
Mtu : 1024[B]
Link type : Ethernet
GID index : 1
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
local address: LID 0000 QPN 0x00ab PSN 0xdd6dce RKey 0x014700 VAddr 0x00ffff9b44d000
GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:240:24:216
local address: LID 0000 QPN 0x00ac PSN 0x9e3ea0 RKey 0x014700 VAddr 0x00ffff9b45d000
GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:240:24:216
remote address: LID 0000 QPN 0x8bb2 PSN 0xe611b RKey 0x014c00 VAddr 0x00fffff7ac9000
GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:240:24:220
remote address: LID 0000 QPN 0x8bb3 PSN 0x90469 RKey 0x014c00 VAddr 0x00fffff7ad9000
GID: 00:00:00:00:00:00:00:00:00:00:255:255:10:240:24:220
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
65536 176709 0.00 11043.38 0.176694
65536 176706 0.00 11043.54 0.176697
客户端BF3查看网口统计值:
root@qyyc01-tron-bf3-240024216:~# ./rdma_stats.sh
[ p0]: 997941975 50061865547
[ p1]: 0 50055305972
可以看到单口带宽打到了100G。
宿主机上跑fio测试性能:
root@ub
可。