DPDK(八):l2fwd代码分析

L2fwd即二层转发,通过MAC地址进行发包。主线上的代码为输入偶数个端口,每两个端口为互发端口,这样两个端口均被DPDK接管, TCPDUMP之类的工具没发使用,所以我在实验中只接管一个端口,并将报文转发到源地址,实验过程见DPDK(7):l2fwd测试过程

下面分析代码实现:

1、运行参数:-c 1 -n 2 -- -q 1  -p 1

-c:指定分配给DPDK使用的逻辑数,我只是使用一个核

-n:每个CPU的内存通道数

--:之后表示为次参数

-q:每个CPU管理的队列数,这里设置为一个队列

-p:使用的端口,我测试的时候使用第一个端口

从上面可以看出,运行DPDK的程序需要设置参数,而且得提前绑定端口,在可维护性方面很差,这应该算开源软件的缺点,所以在前东家把这部分代码都重新进行了抽象,添加了维护性代码,API也设计的简单清晰。


2、代码运行过程:

(1)、首先看一下看不到的PMD注册过程:

#define PMD_REGISTER_DRIVER(d)\
void devinitfn_ ##d(void);\
void __attribute__((constructor, used)) devinitfn_ ##d(void)\
{\
	rte_eal_driver_register(&d);\
}

void __attribute__((constructor, used)) 这部分是GCC提供的构造函数属性,在MAIN开始前被调用,所在在新版本的DPDK中看不到驱动注册过程,
请看下面的代码调用,LIBC在启动MAIN之前调用了注册函数,注册了VMXNET3驱动:

Breakpoint 1, rte_eal_driver_register (driver=0x82462c0 <rte_vmxnet3_driver>) at /opt/code/dpdk-1.8.0/lib/librte_eal/common/eal_common_dev.c:54
54		TAILQ_INSERT_TAIL(&dev_driver_list, driver, next);
(gdb) bt
#0  rte_eal_driver_register (driver=0x82462c0 <rte_vmxnet3_driver>) at /opt/code/dpdk-1.8.0/lib/librte_eal/common/eal_common_dev.c:54
#1  0x081265ba in devinitfn_rte_vmxnet3_driver () at /opt/code/dpdk-1.8.0/lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c:781
#2  0x081f2eb2 in __libc_csu_init ()
#3  0xb7dbda1a in __libc_start_main (main=0x8061dc0 <main>, argc=10, argv=0xbffff6e4, init=0x81f2e60 <__libc_csu_init>, fini=0x81f2ed0 <__libc_csu_fini>, rtld_fini=0xb7fed180 <_dl_fini>, 
    stack_end=0xbffff6dc) at libc-start.c:246
#4  0x0805eeb1 in _start ()

(2)初始化了EAL

在这个里面做了很多重要的事情,可以单独作为几篇展开,对我们后面的使用很重要的是大叶内存的构建,绑定端口、驱动。

	/* init EAL */
	ret = rte_eal_init(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
	argc -= ret;
	argv += ret;


在这个地方将找到驱动和端口的关系,因为我的环境使用的网卡为e1000,找到对应驱动为rte_em_pmd:

(gdb) bt
#0  eth_em_dev_init (eth_drv=0x8247de0 <rte_em_pmd>, eth_dev=0x8316d80 <rte_eth_devices>) at /opt/code/dpdk-1.8.0/lib/librte_pmd_e1000/em_ethdev.c:225
#1  0x080e414b in rte_eth_dev_init (pci_drv=0x8247de0 <rte_em_pmd>, pci_dev=0x8324728) at /opt/code/dpdk-1.8.0/lib/librte_ether/rte_ethdev.c:276
#2  0x080f3076 in rte_eal_pci_probe_one_driver (dr=0x8247de0 <rte_em_pmd>, dev=0x8324728) at /opt/code/dpdk-1.8.0/lib/librte_eal/linuxapp/eal/eal_pci.c:589
#3  0x080fe5fa in pci_probe_all_drivers (dev=0x8324728) at /opt/code/dpdk-1.8.0/lib/librte_eal/common/eal_common_pci.c:114
#4  0x080fe69f in rte_eal_pci_probe () at /opt/code/dpdk-1.8.0/lib/librte_eal/common/eal_common_pci.c:151
#5  0x080ed451 in rte_eal_init (argc=10, argv=0xbffff6e4) at /opt/code/dpdk-1.8.0/lib/librte_eal/linuxapp/eal/eal.c:839
#6  0x08061de9 in main ()
(gdb)


在这个初始化函数里面将给设备设置好接收包函数、设备操作函数:
static int
eth_em_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
		struct rte_eth_dev *eth_dev)
{
	struct rte_pci_device *pci_dev;
	struct e1000_hw *hw =
		E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
	struct e1000_vfta * shadow_vfta =
		E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private);

	pci_dev = eth_dev->pci_dev;
	eth_dev->dev_ops = ð_em_ops;
	eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts;
	eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts;

(3)、解析运行参数:

	/* parse application arguments (after the EAL ones) */
	ret = l2fwd_parse_args(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");

(4)、创建MBUF内存池,用于收发包:

/* create the mbuf pool */
l2fwd_pktmbuf_pool =
rte_mempool_create("mbuf_pool", NB_MBUF,
  MBUF_SIZE, 32,
  sizeof(struct rte_pktmbuf_pool_private),
  rte_pktmbuf_pool_init, NULL,
  rte_pktmbuf_init, NULL,
  rte_socket_id(), 0);


(5)、下面这段是找到需要使用的端口以及CPU和端口的关系,端口收发两两一对:

nb_ports = rte_eth_dev_count();
	if (nb_ports == 0)
		rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");

	if (nb_ports > RTE_MAX_ETHPORTS)
		nb_ports = RTE_MAX_ETHPORTS;

	/* reset l2fwd_dst_ports */
	for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
		l2fwd_dst_ports[portid] = 0;
	last_port = 0;

	/*
	 * Each logical core is assigned a dedicated TX queue on each port.
	 */
	for (portid = 0; portid < nb_ports; portid++) {
		/* skip ports that are not enabled */
		if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
			continue;

		if (nb_ports_in_mask % 2) {
			l2fwd_dst_ports[portid] = last_port;
			l2fwd_dst_ports[last_port] = portid;
		}
		else
			last_port = portid;

		nb_ports_in_mask++;

		rte_eth_dev_info_get(portid, &dev_info);
	}
	if (nb_ports_in_mask % 2) {
		printf("Notice: odd number of ports in portmask.\n");
		l2fwd_dst_ports[last_port] = last_port;
	}

	rx_lcore_id = 0;
	qconf = NULL;

	/* Initialize the port/queue configuration of each logical core */
	for (portid = 0; portid < nb_ports; portid++) {
		/* skip ports that are not enabled */
		if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
			continue;

		/* get the lcore_id for this port */
		while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
		       lcore_queue_conf[rx_lcore_id].n_rx_port ==
		       l2fwd_rx_queue_per_lcore) {
			rx_lcore_id++;
			if (rx_lcore_id >= RTE_MAX_LCORE)
				rte_exit(EXIT_FAILURE, "Not enough cores\n");
		}

		if (qconf != &lcore_queue_conf[rx_lcore_id])
			/* Assigned a new logical core in the loop above. */
			qconf = &lcore_queue_conf[rx_lcore_id];

		qconf->rx_port_list[qconf->n_rx_port] = portid;
		qconf->n_rx_port++;
		printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
	}

(6)、设置端口收发队列、然后开启端口工作、开启混杂模式

		ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
		if (ret < 0)
			rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
				  ret, (unsigned) portid);

		rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]);

		/* init one RX queue */
		fflush(stdout);
		ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
					     rte_eth_dev_socket_id(portid),
					     NULL,
					     l2fwd_pktmbuf_pool);
		if (ret < 0)
			rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
				  ret, (unsigned) portid);

		/* init one TX queue on each port */   
		fflush(stdout);
		ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
				rte_eth_dev_socket_id(portid),
				NULL);
		if (ret < 0)
			rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
				ret, (unsigned) portid);

		/* Start device */
		ret = rte_eth_dev_start(portid);
		if (ret < 0)
			rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
				  ret, (unsigned) portid);

(7)、设置主循环函数:

	/* launch per-lcore init on every lcore */
	rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		if (rte_eal_wait_lcore(lcore_id) < 0)
			return -1;
	}

(8)、看看循环函数里面做了什么

static void
l2fwd_main_loop(void)
{
	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
	struct rte_mbuf *m;
	unsigned lcore_id;
	uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
	unsigned i, j, portid, nb_rx;
	struct lcore_queue_conf *qconf;
	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;

	prev_tsc = 0;
	timer_tsc = 0;

	lcore_id = rte_lcore_id();
	qconf = &lcore_queue_conf[lcore_id];

	if (qconf->n_rx_port == 0) {
		RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
		return;
	}

	RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);

	for (i = 0; i < qconf->n_rx_port; i++) {

		portid = qconf->rx_port_list[i];
		RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
			portid);
	}

	while (1) {

		cur_tsc = rte_rdtsc();

		/*
		 * TX burst queue drain
		 */
		diff_tsc = cur_tsc - prev_tsc;
		/* 到一定时间发送报文 */
		if (unlikely(diff_tsc > drain_tsc)) {

			for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
				if (qconf->tx_mbufs[portid].len == 0)
					continue;
				l2fwd_send_burst(&lcore_queue_conf[lcore_id],
						 qconf->tx_mbufs[portid].len,
						 (uint8_t) portid);
				qconf->tx_mbufs[portid].len = 0;
			}

			/* if timer is enabled */
			if (timer_period > 0) {

				/* advance the timer */
				timer_tsc += diff_tsc;
				/* 在主线程上定时打印统计信息 */
				/* if timer has reached its timeout */
				if (unlikely(timer_tsc >= (uint64_t) timer_period)) {

					/* do this only on master core */
					if (lcore_id == rte_get_master_lcore()) {
						print_stats();
						/* reset the timer */
						timer_tsc = 0;
					}
				}
			}

			prev_tsc = cur_tsc;
		}

		/*
		 * Read packet from RX queues
		 */
		for (i = 0; i < qconf->n_rx_port; i++) {
			/* 收报文,进行L2转发 */
			portid = qconf->rx_port_list[i];
			nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
						 pkts_burst, MAX_PKT_BURST);

			port_statistics[portid].rx += nb_rx;

			for (j = 0; j < nb_rx; j++) {
				m = pkts_burst[j];
				rte_prefetch0(rte_pktmbuf_mtod(m, void *));
				l2fwd_simple_forward(m, portid);
			}
		}
	}
}


(9)、l2fwd_simple_forward,进行了修改

static void
l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
{
    struct ether_hdr *eth;
    struct ether_addr tmp_addr;
    struct ipv4_hdr *ipv4_hdr;
    uint32_t tmp_ip_addr;

    /* 交换MAC地址 */
    eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
    ether_addr_copy(&tmp_addr, ð->s_addr);
    ether_addr_copy(ð->s_addr, ð->d_addr);
    ether_addr_copy(ð->d_addr, &tmp_addr);

    /* 添加三层IP地址交换查看效果*/
    ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
			sizeof(struct ether_hdr));
    tmp_ip_addr = ipv4_hdr->dst_addr;
    ipv4_hdr->dst_addr = ipv4_hdr->src_addr;
    ipv4_hdr->src_addr = tmp_ip_addr; 
    
    /* 从接收端口发送出去 */
    l2fwd_send_packet(m, (uint8_t) portid);
}


(10)、看一下真正调用到的收发包函数:

收报文:

(gdb) bt
#0  eth_em_recv_pkts (rx_queue=0xaf8fa000, rx_pkts=0xbffff4c0, nb_pkts=32) at /opt/code/dpdk-1.8.0/lib/librte_pmd_e1000/em_rxtx.c:684
#1  0x08060fc5 in rte_eth_rx_burst ()
#2  0x08061853 in l2fwd_main_loop ()
#3  0x08061933 in l2fwd_launch_one_lcore ()
#4  0x080fe4dc in rte_eal_mp_remote_launch (f=0x8061928 <l2fwd_launch_one_lcore>, arg=0x0, call_master=CALL_MASTER) at /opt/code/dpdk-1.8.0/lib/librte_eal/common/eal_common_launch.c:91
#5  0x0806248c in main ()
(gdb) 


发报文:

(gdb) bt
#0  eth_em_xmit_pkts (tx_queue=0xaf8f9d40, tx_pkts=0x828ec48 <lcore_queue_conf+72>, nb_pkts=32) at /opt/code/dpdk-1.8.0/lib/librte_pmd_e1000/em_rxtx.c:415
#1  0x08061023 in rte_eth_tx_burst ()
#2  0x0806124b in l2fwd_send_burst ()
#3  0x0806138c in l2fwd_send_packet ()
#4  0x08061467 in l2fwd_simple_forward ()
#5  0x080618ea in l2fwd_main_loop ()
#6  0x08061933 in l2fwd_launch_one_lcore ()
#7  0x080fe4dc in rte_eal_mp_remote_launch (f=0x8061928 <l2fwd_launch_one_lcore>, arg=0x0, call_master=CALL_MASTER) at /opt/code/dpdk-1.8.0/lib/librte_eal/common/eal_common_launch.c:91
#8  0x0806248c in main ()
(gdb) 




©️2020 CSDN 皮肤主题: 深蓝海洋 设计师: CSDN官方博客 返回首页
实付0元
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值