DPDK BPF

DPDK BPF

DPDK 自版本 18.05 已集成了 librte_bpf, 主要利用rte_eth_rx_burst/rte_eth_tx_burst 回调函数机制, 执行eBPF字节码. 当前支持以下特性:

  • base eBPF ISA (except tail-pointer)
  • JIT (x86_64 and arm64 only)
  • eBPF code verifier
  • user-defined helper functions (64-bit only)
  • RX/TX filter (加载 eBPF grog 作为 DPDK RX/TX 回调函数处理数据包, 单独跟每个RX/TX绑定)
  • rte_mbuf access (64-bit only)

不支持的功能特性:

  • cBPF
  • eBPF MAP
  • tail-pointer calls
  • external function calls for 32-bit platforms

DPDK BPF 执行流程

在这里插入图片描述

Fedora

sudo dnf install -y git gcc ncurses-devel elfutils-libelf-devel bc \
  openssl-devel libcap-devel clang llvm graphviz bison flex glibc-static

generate bpf prog

examples/bpf/t1.c 提供了一个处理原始数据报文的例子, 检测到匹配IP地址与UDP目的端口5000则丢弃:

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright(c) 2018 Intel Corporation
 */

/*
 * eBPF program sample.
 * Accepts pointer to first segment packet data as an input parameter.
 * analog of tcpdump -s 1 -d 'dst 1.2.3.4 && udp && dst port 5000'
 * (000) ldh      [12]
 * (001) jeq      #0x800           jt 2    jf 12
 * (002) ld       [30]
 * (003) jeq      #0x1020304       jt 4    jf 12
 * (004) ldb      [23]
 * (005) jeq      #0x11            jt 6    jf 12
 * (006) ldh      [20]
 * (007) jset     #0x1fff          jt 12   jf 8
 * (008) ldxb     4*([14]&0xf)
 * (009) ldh      [x + 16]
 * (010) jeq      #0x1388          jt 11   jf 12
 * (011) ret      #1
 * (012) ret      #0
 *
 * To compile on x86:
 * clang -O2 -U __GNUC__ -target bpf -c t1.c
 *
 * To compile on ARM:
 * clang -O2 -I/usr/include/aarch64-linux-gnu/ -target bpf -c t1.c
 */

#include <stdint.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>

uint64_t
entry(void *pkt)
{
	struct ether_header *ether_header = (void *)pkt;

	if (ether_header->ether_type != htons(0x0800))
		return 0;

	struct iphdr *iphdr = (void *)(ether_header + 1);
	if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||
			iphdr->daddr != htonl(0x1020304))
		return 0;

	int hlen = iphdr->ihl * 4;
	struct udphdr *udphdr = (void *)iphdr + hlen;

	if (udphdr->dest != htons(5000))
		return 0;

	return 1;
}

编译bpf字节码:

# clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include -target bpf  -Wno-int-to-void-pointer-cast -c t1.c
# llvm-objdump  --arch=bpf -S t1.o

t1.o:	file format elf64-bpf


Disassembly of section .text:

0000000000000000 <entry>:
       0:	b7 00 00 00 00 00 00 00	r0 = 0
       1:	69 12 0c 00 00 00 00 00	r2 = *(u16 *)(r1 + 12)
       2:	55 02 0f 00 08 00 00 00	if r2 != 8 goto +15 <LBB0_6>
       3:	71 12 17 00 00 00 00 00	r2 = *(u8 *)(r1 + 23)
       4:	55 02 0d 00 11 00 00 00	if r2 != 17 goto +13 <LBB0_6>
       5:	69 12 14 00 00 00 00 00	r2 = *(u16 *)(r1 + 20)
       6:	55 02 0b 00 00 00 00 00	if r2 != 0 goto +11 <LBB0_6>
       7:	61 12 1e 00 00 00 00 00	r2 = *(u32 *)(r1 + 30)
       8:	55 02 09 00 01 02 03 04	if r2 != 67305985 goto +9 <LBB0_6>
       9:	07 01 00 00 0e 00 00 00	r1 += 14
      10:	71 12 00 00 00 00 00 00	r2 = *(u8 *)(r1 + 0)
      11:	67 02 00 00 02 00 00 00	r2 <<= 2
      12:	57 02 00 00 3c 00 00 00	r2 &= 60
      13:	0f 21 00 00 00 00 00 00	r1 += r2
      14:	69 11 02 00 00 00 00 00	r1 = *(u16 *)(r1 + 2)
      15:	b7 00 00 00 01 00 00 00	r0 = 1
      16:	15 01 01 00 13 88 00 00	if r1 == 34835 goto +1 <LBB0_6>
      17:	b7 00 00 00 00 00 00 00	r0 = 0

0000000000000090 <LBB0_6>:
      18:	95 00 00 00 00 00 00 00	exit

load/unload bpf prog

testpmd 提供了一组bpf命令用于验证bpf功能:

testpmd> bpf-load rx|tx <portid> <queueid> <load-flags> <filename>
testpmd> bpf-unload rx|tx <portid> <queueid>

bpf with rte_mbuf*

bpf入参为 rte_mbuf *

bpf-load rx 0 0 M <path>/t3.o
...
bpf-load rx 0 n M <path>/t3.o

bpf with raw packet

bpf入参为原始报文数据

bpf-load rx 0 0 J <path>/t4.o
...
bpf-load rx 0 n J <path>/t4.o

bpf with vm

bpf入参为原始报文数据, 使用 bpf vm 执行字节码:

bpf-load rx 0 0 - <path>/t5.o
...
bpf-load rx 0 n - <path>/t5.o

unload bpf

bpf-unload rx 0 0
...
bpf-unload rx 0 n

Performance

硬件

CPU: Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz
Mellanox Technologies MT2892 Family [ConnectX-6 Dx]

dpdk 21.05 testpmd:

!/bin/sh

#EAL_ARGS+=" --log-level="lib.eal":8 --log-level=pmd:8 --log-level="pmd.net.mlx5":3 "
NR_Q=18

APP=./dpdk-testpmd-21.05
$APP -l 24-47 --socket-mem=4096,4096 -n 4  -w '54:00.1,dv_flow_en=1,mprq_en=1,rxqs_min_mprq=1,rx_vec_en=1' ${EAL_ARGS}  -- \
	-i  --rxq=${NR_Q} --txq=${NR_Q} --nb-cores=23 --forward-mode icmpecho --no-numa --enable-rx-cksum --auto-start --rxd=2048 --txd=2048 --burst=64

bpf prog

bpf 丢弃UDP目的端口为5000所有数据报文: t1.c 简化版, 移除了IP地址判断:

#include <stdint.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>

uint64_t
entry(void *pkt)
{
	struct ether_header *ether_header = (void *)pkt;

	if (ether_header->ether_type != htons(0x0800))
		return 1;

	struct iphdr *iphdr = (void *)(ether_header + 1);
	if (iphdr->protocol != 17)
		return 1;

	int hlen = iphdr->ihl * 4;
	struct udphdr *udphdr = (void *)iphdr + hlen;
	if (udphdr->dest != htons(5000))
		return 0;

	return 0;
}

编译:

clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include -target bpf  -Wno-int-to-void-pointer-cast -c t4.c

Load:

bpf-load rx 0 0 J <path>/t4.o
bpf-load rx 0 1 J <path>/t4.o
bpf-load rx 0 2 J <path>/t4.o
bpf-load rx 0 3 J <path>/t4.o
bpf-load rx 0 4 J <path>/t4.o
bpf-load rx 0 5 J <path>/t4.o
bpf-load rx 0 6 J <path>/t4.o
bpf-load rx 0 7 J <path>/t4.o
bpf-load rx 0 8 J <path>/t4.o
bpf-load rx 0 9 J <path>/t4.o
bpf-load rx 0 10 J <path>/t4.o
bpf-load rx 0 11 J <path>/t4.o
bpf-load rx 0 12 J <path>/t4.o
bpf-load rx 0 13 J <path>/t4.o
bpf-load rx 0 14 J <path>/t4.o
bpf-load rx 0 15 J <path>/t4.o
bpf-load rx 0 16 J <path>/t4.o
bpf-load rx 0 17 J <path>/t4.o

result

在当前测试硬件环境下, icmpechoRX-DROP处理模式, 执行bpf字节码只做简单丢弃, 这种方式对性能几乎无影响, 可考虑用于插件处理数据包:

testpmd> show port stats all

  ######################## NIC statistics for port 0  ########################
  RX-packets: 81360790320 RX-missed: 8141       RX-bytes:  4881647419320
  RX-errors: 0
  RX-nombuf:  0         
  TX-packets: 4          TX-errors: 0          TX-bytes:  360

  Throughput (since last show)
  Rx-pps:    149155140          Rx-bps:  71594467312
  Tx-pps:            0          Tx-bps:            0
  ############################################################################
 
bpf-unload rx 0 0
bpf-unload rx 0 1
bpf-unload rx 0 2
bpf-unload rx 0 3
bpf-unload rx 0 4
bpf-unload rx 0 5
bpf-unload rx 0 6
bpf-unload rx 0 7
bpf-unload rx 0 8
bpf-unload rx 0 9
bpf-unload rx 0 10
bpf-unload rx 0 11
bpf-unload rx 0 12
bpf-unload rx 0 13
bpf-unload rx 0 14
bpf-unload rx 0 15
bpf-unload rx 0 16
bpf-unload rx 0 17
testpmd> show port stats all

  ######################## NIC statistics for port 0  ########################
  RX-packets: 60151600493 RX-missed: 8141       RX-bytes:  3609096029700
  RX-errors: 0
  RX-nombuf:  0         
  TX-packets: 4          TX-errors: 0          TX-bytes:  360

  Throughput (since last show)
  Rx-pps:    149159900          Rx-bps:  71596752112
  Tx-pps:            0          Tx-bps:            0
  ############################################################################
testpmd> 

Reference

eBPF spec
DPDK- Berkeley Packet Filter Library
Awesome eBPF
Cilium - BPF and XDP Reference Guide

  • 3
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值