硬件环境如下(两块Intel X722 PCIE网卡):
# lspci | grep Ethernet
19:00.0 Ethernet controller: Intel Corporation Ethernet Connection X722 for 10GBASE-T (rev 09)
19:00.1 Ethernet controller: Intel Corporation Ethernet Connection X722 for 10GBASE-T (rev 09)
查看PCIE设备的属性
# cat /sys/class/net/eno2/device/resource
0x00000000c2000000 0x00000000c2ffffff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c5000000 0x00000000c5007fff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c5d00000 0x00000000c5d7ffff 0x000000000004e200
0x00000000c4000000 0x00000000c47fffff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c5010000 0x00000000c510ffff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
# lspci -s 0000:19:00.0 -vvv
19:00.0 Ethernet controller: Intel Corporation Ethernet Connection X722 for 10GBASE-T (rev 09)
Subsystem: Super Micro Computer Inc Device 37d2
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0, Cache Line Size: 32 bytes
Interrupt: pin A routed to IRQ 30
NUMA node: 0
Region 0: Memory at c3000000 (64-bit, prefetchable) [size=16M]
Region 3: Memory at c5008000 (64-bit, prefetchable) [size=32K]
Expansion ROM at c5d80000 [disabled] [size=512K]
将两块网卡绑定为DPDK设备
# modprobe uio
# insmod build/kmod/igb_uio.ko
# ./usertools/dpdk-devbind.py -b igb_uio 0000:19:00.0
# ./usertools/dpdk-devbind.py -b igb_uio 0000:19:00.1
# ./usertools/dpdk-devbind.py --status
Network devices using DPDK-compatible driver
============================================
0000:19:00.0 'Ethernet Connection X722 for 10GBASE-T 37d2' drv=igb_uio unused=i40e
0000:19:00.1 'Ethernet Connection X722 for 10GBASE-T 37d2' drv=igb_uio unused=i40e
No 'Crypto' devices detected
============================
No 'Eventdev' devices detected
==============================
No 'Mempool' devices detected
=============================
No 'Compress' devices detected
==============================
可通过uio的属性文件查看到绑定的网卡信息
# cat /sys/class/uio/uio0/dev
238:0
# cat /sys/class/uio/uio0/version
0.1
# cat /sys/class/uio/uio0/name
igb_uio
# cat /sys/class/uio/uio0/device/dma_mask_bits
64
# cat /sys/class/uio/uio0/device/device
0x37d2
# cat /sys/class/uio/uio0/device/resource
0x00000000c2000000 0x00000000c2ffffff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c5000000 0x00000000c5007fff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c5d00000 0x00000000c5d7ffff 0x000000000004e200
0x00000000c4000000 0x00000000c47fffff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x00000000c5010000 0x00000000c510ffff 0x000000000014220c
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
运行l2fwd示例程序
# ./l2fwd -c 1 -n 4 -- -q 4 -p 0x3
EAL: Detected 20 lcore(s)
EAL: Detected 1 NUMA nodes
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: No free hugepages reported in hugepages-1048576kB
EAL: Probing VFIO support...
EAL: PCI device 0000:19:00.0 on NUMA socket 0
EAL: probe driver: 8086:37d2 net_i40e
EAL: PCI device 0000:19:00.1 on NUMA socket 0
EAL: probe driver: 8086:37d2 net_i40e
MAC updating enabled
Lcore 0: RX port 0
Lcore 0: RX port 1
Initializing port 0... done:
Port 0, MAC address: 3C:EC:EF:0C:3A:C2
Initializing port 1... done:
Port 1, MAC address: 3C:EC:EF:0C:3A:C3
Checking link status..........................................................................................done
Port0 Link Up. Speed 1000 Mbps - full-duplex
Port 1 Link Down
L2FWD: entering main loop on lcore 0
L2FWD: -- lcoreid=0 portid=0
L2FWD: -- lcoreid=0 portid=1
Port statistics ====================================
Statistics for port 0 ------------------------------
Packets sent: 0
Packets received: 0
Packets dropped: 0
Statistics for port 1 ------------------------------
Packets sent: 0
Packets received: 0
Packets dropped: 0
Aggregate statistics ===============================
Total packets sent: 0
Total packets received: 0
Total packets dropped: 0
====================================================
Port statistics ====================================
Statistics for port 0 ------------------------------
Packets sent: 0
Packets received: 218
Packets dropped: 0
Statistics for port 1 ------------------------------
Packets sent: 218
Packets received: 0
Packets dropped: 0
Aggregate statistics ===============================
Total packets sent: 218
Total packets received: 218
Total packets dropped: 0
====================================================
Port statistics ====================================
Statistics for port 0 ------------------------------
Packets sent: 64
Packets received: 362
Packets dropped: 0
Statistics for port 1 ------------------------------
Packets sent: 362
Packets received: 64
Packets dropped: 0
Aggregate statistics ===============================
Total packets sent: 426
Total packets received: 426
Total packets dropped: 0
====================================================
从“Packets sent”和“Packets received”可看出端口之间有数据包转发。
DPDK l2fwd示例程序代码解析
收发包核心逻辑代码如下:
/* main processing loop */
static void
l2fwd_main_loop(void)
{
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
struct rte_mbuf *m;
int sent;
unsigned lcore_id;
uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
unsigned i, j, portid, nb_rx;
struct lcore_queue_conf *qconf;
const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
BURST_TX_DRAIN_US;
struct rte_eth_dev_tx_buffer *buffer;
prev_tsc = 0;
timer_tsc = 0;
lcore_id = rte_lcore_id();
qconf = &lcore_queue_conf[lcore_id];
if (qconf->n_rx_port == 0) {
RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
return;
}
RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
for (i = 0; i < qconf->n_rx_port; i++) {
portid = qconf->rx_port_list[i];
RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
portid);
}
while (!force_quit) {
cur_tsc = rte_rdtsc();
/*
* TX burst queue drain
*/
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {
for (i = 0; i < qconf->n_rx_port; i++) {
portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
buffer = tx_buffer[portid];
sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
if (sent)
port_statistics[portid].tx += sent;
}
/* if timer is enabled */
if (timer_period > 0) {
/* advance the timer */
timer_tsc += diff_tsc;
/* if timer has reached its timeout */
if (unlikely(timer_tsc >= timer_period)) {
/* do this only on master core */
if (lcore_id == rte_get_master_lcore()) {
print_stats();
/* reset the timer */
timer_tsc = 0;
}
}
}
prev_tsc = cur_tsc;
}
/*
* Read packet from RX queues
*/
for (i = 0; i < qconf->n_rx_port; i++) {
portid = qconf->rx_port_list[i];
nb_rx = rte_eth_rx_burst(portid, 0,
pkts_burst, MAX_PKT_BURST);
port_statistics[portid].rx += nb_rx;
for (j = 0; j < nb_rx; j++) {
m = pkts_burst[j];
rte_prefetch0(rte_pktmbuf_mtod(m, void *));
l2fwd_simple_forward(m, portid);
}
}
}
}
其中主要逻辑是通过rte_eth_rx_burst收到数据包后,调用l2fwd_simple_forward(包含rte_eth_tx_buffer)函数,将数据再转发出去。
/*
* Read packet from RX queues
*/
for (i = 0; i < qconf->n_rx_port; i++) {
portid = qconf->rx_port_list[i];
nb_rx = rte_eth_rx_burst(portid, 0,
pkts_burst, MAX_PKT_BURST);
port_statistics[portid].rx += nb_rx;
for (j = 0; j < nb_rx; j++) {
m = pkts_burst[j];
rte_prefetch0(rte_pktmbuf_mtod(m, void *));
l2fwd_simple_forward(m, portid);
}
}
参考链接: