DPDK总结(网卡初始化)

网卡初始化过程

图片来源于http://blog.chinaunix.net/uid-28541347-id-5785122.html
在这里插入图片描述

函数功能
rte_eth_dev_count()网卡数
rte_eth_dev_configure()配置网卡
rte_eth_rx_queue_setup()
rte_eth_tx_queue_setup()
为网卡分配接收/发送队列
rte_eth_dev_start()启动网卡
rte_eth_rx_burst()
rte_eth_tx_burst()
基于指定网卡指定队列的收/发包函数

rte_eth_dev / rte_eth_dev_data

DPDK定义了一个rte_eth_devices数组,数组元素类型为struct rte_eth_dev,一个数组元素表示一块网卡。struct rte_eth_dev有四个重要的成员:rx/tx_pkt_burst、dev_ops、data,其中前两者分别是网卡的burst收/发包函数;dev_ops是网卡驱动注册的函数表,类型为struct eth_dev_ops;data包含了网卡的主要信息,类型为struct rte_eth_dev_data

struct rte_eth_dev {
	/* 在rte_bus_probe()中注册rx/tx_pkt_burst */
	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
	eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */
	struct rte_eth_dev_data *data;  /**< Pointer to device data */
	/* 在rte_bus_probe()中注册dev_ops */
	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
	struct rte_device *device; /**< Backing device */
	struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
	/** User application callbacks for NIC interrupts */
	struct rte_eth_dev_cb_list link_intr_cbs;
	/**
	 * User-supplied functions called from rx_burst to post-process
	 * received packets before passing them to the user
	 */
	struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
	/**
	 * User-supplied functions called from tx_burst to pre-process
	 * received packets before passing them to the driver for transmission.
	 */
	struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
	enum rte_eth_dev_state state; /**< Flag indicating the port state */
} __rte_cache_aligned;

struct rte_eth_dev_data {
	char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */

	/* 接收队列数组 */
	void **rx_queues; /**< Array of pointers to RX queues. */
	/* 发送队列数组 */
	void **tx_queues; /**< Array of pointers to TX queues. */
	/* 接收队列数组长度 */
	uint16_t nb_rx_queues; /**< Number of RX queues. */
	/* 发送队列数组长度 */
	uint16_t nb_tx_queues; /**< Number of TX queues. */

	struct rte_eth_dev_sriov sriov;    /**< SRIOV data */

	void *dev_private;              /**< PMD-specific private data */

	struct rte_eth_link dev_link;
	/**< Link-level information & status */

	struct rte_eth_conf dev_conf;   /**< Configuration applied to device. */
	uint16_t mtu;                   /**< Maximum Transmission Unit. */

	uint32_t min_rx_buf_size;
	/**< Common rx buffer size handled by all queues */

	uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */
	struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */
	uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR];
	/** bitmap array of associating Ethernet MAC addresses to pools */
	struct ether_addr* hash_mac_addrs;
	/** Device Ethernet MAC addresses of hash filtering. */
	uint8_t port_id;           /**< Device [external] port identifier. */
	__extension__
	uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
		scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
		all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
	/** Queues state: STARTED(1) / STOPPED(0) */
	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
	/** Queues state: STARTED(1) / STOPPED(0) */
	uint32_t dev_flags; /**< Capabilities */
	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough */
	int numa_node;  /**< NUMA node connection */
	struct rte_vlan_filter_conf vlan_filter_conf;
	/**< VLAN filter configuration. */
};

struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];

static struct rte_eth_dev_data *rte_eth_dev_data;

rte_eth_dev_count()

uint8_t
rte_eth_dev_count(void)
{
	uint8_t p;
	uint8_t count;

	count = 0;

	RTE_ETH_FOREACH_DEV(p)
		count++;

	return count;
}

#define RTE_ETH_FOREACH_DEV(p)					\
	for (p = rte_eth_find_next(0);				\
	     (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS;	\
	     p = rte_eth_find_next(p + 1))

/* 从port_id开始遍历rte_eth_devices数组,找到第一个状态为RTE_ETH_DEV_ATTACHED的设备 */
uint8_t
rte_eth_find_next(uint8_t port_id)
{
	while (port_id < RTE_MAX_ETHPORTS &&
	       rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
		port_id++;

	if (port_id >= RTE_MAX_ETHPORTS)
		return RTE_MAX_ETHPORTS;

	return port_id;
}

rte_eth_dev_configure()

rte_eth_dev_configure()的主要工作是分配接收/发送队列数组,数组元素类型是void *,一个数组元素表示一个接收/发送队列

int
rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
		      const struct rte_eth_conf *dev_conf)
{
	struct rte_eth_dev *dev;
	struct rte_eth_dev_info dev_info;
	int diag;

	/* 检查port_id是否合法 */
	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

	/* 检查接收队列数是否大于DPDK上限 */
	if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) {
		RTE_PMD_DEBUG_TRACE(
			"Number of RX queues requested (%u) is greater than max supported(%d)\n",
			nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
		return -EINVAL;
	}

	/* 检查发送队列数是否大于DPDK上限 */
	if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
		RTE_PMD_DEBUG_TRACE(
			"Number of TX queues requested (%u) is greater than max supported(%d)\n",
			nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
		return -EINVAL;
	}

	/* 得到port_id对应的设备 */
	dev = &rte_eth_devices[port_id];

	/* 检查dev_infos_get和dev_configure是否定义 */
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);

	/* 检查设备是否已启动 */
	if (dev->data->dev_started) {
		RTE_PMD_DEBUG_TRACE(
		    "port %d must be stopped to allow configuration\n", port_id);
		return -EBUSY;
	}

	/* Copy the dev_conf parameter into the dev structure */
	/* 复制dev_conf到dev->data->dev_conf */
	memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf));

	/*
	 * Check that the numbers of RX and TX queues are not greater
	 * than the maximum number of RX and TX queues supported by the
	 * configured device.
	 */
	/* ixgbe为ixgbe_dev_info_get() */
	(*dev->dev_ops->dev_infos_get)(dev, &dev_info);

	/* 检查接收/发送队列数是否同时为0 */
	if (nb_rx_q == 0 && nb_tx_q == 0) {
		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d both rx and tx queue cannot be 0\n", port_id);
		return -EINVAL;
	}

	/* 检查接收队列数是否大于网卡上限 */
	if (nb_rx_q > dev_info.max_rx_queues) {
		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n",
				port_id, nb_rx_q, dev_info.max_rx_queues);
		return -EINVAL;
	}

	/* 检查发送队列数是否大于网卡上限 */
	if (nb_tx_q > dev_info.max_tx_queues) {
		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n",
				port_id, nb_tx_q, dev_info.max_tx_queues);
		return -EINVAL;
	}

	/* Check that the device supports requested interrupts */
	if ((dev_conf->intr_conf.lsc == 1) &&
		(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
			RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n",
					dev->device->driver->name);
			return -EINVAL;
	}
	if ((dev_conf->intr_conf.rmv == 1) &&
	    (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
		RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n",
				    dev->device->driver->name);
		return -EINVAL;
	}

	/*
	 * If jumbo frames are enabled, check that the maximum RX packet
	 * length is supported by the configured device.
	 */
	if (dev_conf->rxmode.jumbo_frame == 1) {
		if (dev_conf->rxmode.max_rx_pkt_len >
		    dev_info.max_rx_pktlen) {
			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
				" > max valid value %u\n",
				port_id,
				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
				(unsigned)dev_info.max_rx_pktlen);
			return -EINVAL;
		} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
				" < min valid value %u\n",
				port_id,
				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
				(unsigned)ETHER_MIN_LEN);
			return -EINVAL;
		}
	} else {
		if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN ||
			dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) /* 小于64或大于1518 */
			/* Use default value */
			dev->data->dev_conf.rxmode.max_rx_pkt_len =
							ETHER_MAX_LEN; /* 默认值为1518 */
	}

	/*
	 * Setup new number of RX/TX queues and reconfigure device.
	 */
	/* 分配接收队列数组,地址赋给dev->data->rx_queues,长度赋给dev->data->nb_rx_queues */
	diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q);
	if (diag != 0) {
		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n",
				port_id, diag);
		return diag;
	}

	/* 分配发送队列数组,地址赋给dev->data->tx_queues,长度赋给dev->data->nb_tx_queues */
	diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
	if (diag != 0) {
		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n",
				port_id, diag);
		rte_eth_dev_rx_queue_config(dev, 0);
		return diag;
	}

	/* ixgbe为ixgbe_dev_configure() */
	diag = (*dev->dev_ops->dev_configure)(dev);
	if (diag != 0) {
		RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n",
				port_id, diag);
		rte_eth_dev_rx_queue_config(dev, 0);
		rte_eth_dev_tx_queue_config(dev, 0);
		return diag;
	}

	return 0;
}

rte_eth_dev_rx_queue_config()

static int
rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
{
	...
	dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
			sizeof(dev->data->rx_queues[0]) * nb_queues,
			RTE_CACHE_LINE_SIZE);
	...
	dev->data->nb_rx_queues = nb_queues; /* 更新nb_rx_queues */
	...
}

rte_eth_dev_tx_queue_config()

static int
rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
{
	...
	dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
					   sizeof(dev->data->tx_queues[0]) * nb_queues,
					   RTE_CACHE_LINE_SIZE);
	...
	dev->data->nb_tx_queues = nb_queues; /* 更新nb_tx_queues */
	...
}

ixgbe_dev_configure()

static int
ixgbe_dev_configure(struct rte_eth_dev *dev)
{
	...
	/* multipe queue mode checking */
	ret  = ixgbe_check_mq_mode(dev);
	...
	/*
	 * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
	 * allocation or vector Rx preconditions we will reset it.
	 */
	adapter->rx_bulk_alloc_allowed = true;
	adapter->rx_vec_allowed = true;
	...
}

rte_eth_rx_queue_setup()

int
rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
		       uint16_t nb_rx_desc, unsigned int socket_id,
		       const struct rte_eth_rxconf *rx_conf,
		       struct rte_mempool *mp)
{
	int ret;
	uint32_t mbp_buf_size;
	struct rte_eth_dev *dev;
	struct rte_eth_dev_info dev_info;
	void **rxq;

	/* 检查port_id是否合法 */
	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

	/* 得到port_id对应的设备 */
	dev = &rte_eth_devices[port_id];
	if (rx_queue_id >= dev->data->nb_rx_queues) {
		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
		return -EINVAL;
	}

	/* 检查设备是否已启动 */
	if (dev->data->dev_started) {
		RTE_PMD_DEBUG_TRACE(
		    "port %d must be stopped to allow configuration\n", port_id);
		return -EBUSY;
	}

	/* 检查dev_infos_get和rx_queue_setup是否定义 */
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP);

	/*
	 * Check the size of the mbuf data buffer.
	 * This value must be provided in the private data of the memory pool.
	 * First check that the memory pool has a valid private data.
	 */
	rte_eth_dev_info_get(port_id, &dev_info);
	if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
		RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n",
				mp->name, (int) mp->private_data_size,
				(int) sizeof(struct rte_pktmbuf_pool_private));
		return -ENOSPC;
	}
	mbp_buf_size = rte_pktmbuf_data_room_size(mp);

	if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
		RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
				"(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)"
				"=%d)\n",
				mp->name,
				(int)mbp_buf_size,
				(int)(RTE_PKTMBUF_HEADROOM +
				      dev_info.min_rx_bufsize),
				(int)RTE_PKTMBUF_HEADROOM,
				(int)dev_info.min_rx_bufsize);
		return -EINVAL;
	}

	/* 检查nb_rx_desc是否大于网卡上限 */
	if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {

		RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), "
			"should be: <= %hu, = %hu, and a product of %hu\n",
			nb_rx_desc,
			dev_info.rx_desc_lim.nb_max,
			dev_info.rx_desc_lim.nb_min,
			dev_info.rx_desc_lim.nb_align);
		return -EINVAL;
	}

	/* 得到接收队列数组 */
	rxq = dev->data->rx_queues;
	if (rxq[rx_queue_id]) {
		/* 检查rx_queue_release是否定义 */
		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
					-ENOTSUP);
		/* ixgbe为ixgbe_dev_rx_queue_release() */
		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
		rxq[rx_queue_id] = NULL;
	}

	if (rx_conf == NULL)
		rx_conf = &dev_info.default_rxconf;

	/* ixgbe为ixgbe_dev_rx_queue_setup() */
	ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
					      socket_id, rx_conf, mp);
	if (!ret) {
		if (!dev->data->min_rx_buf_size ||
		    dev->data->min_rx_buf_size > mbp_buf_size)
			dev->data->min_rx_buf_size = mbp_buf_size;
	}

	return ret;
}

ixgbe_dev_rx_queue_setup()

int __attribute__((cold))
ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
			 uint16_t queue_idx,
			 uint16_t nb_desc,
			 unsigned int socket_id,
			 const struct rte_eth_rxconf *rx_conf,
			 struct rte_mempool *mp)
{
	...
	/* 分配ixgbe_rx_queue */
	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
				 RTE_CACHE_LINE_SIZE, socket_id);
	...
	/* 初始化rxq */
	rxq->mb_pool = mp;
	rxq->nb_rx_desc = nb_desc;
	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
	rxq->queue_id = queue_idx;
	rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
		queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
	rxq->port_id = dev->data->port_id;
	rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
							0 : ETHER_CRC_LEN);
	rxq->drop_en = rx_conf->rx_drop_en;
	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
	...
	/* 分配desc数组,数组元素类型为union ixgbe_adv_rx_desc
	 * (IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * sizeof(union ixgbe_adv_rx_desc)
	 * (4096 + 32) * sizeof(union ixgbe_adv_rx_desc) */
	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
				      RX_RING_SZ, IXGBE_ALIGN, socket_id);
	...
	memset(rz->addr, 0, RX_RING_SZ); /* 清零desc数组 */
	...
	/* 设置rdt_reg_addr为RDT寄存器的地址 */
	rxq->rdt_reg_addr =
		IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
	/* 设置rdh_reg_addr为RDH寄存器的地址 */
	rxq->rdh_reg_addr =
		IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
	...
	/* rx_ring_phys_addr指向desc数组的总线地址 */
	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
	/* rx_ring指向desc数组的虚拟地址 */
	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
	...
	/* 分配entry数组,地址赋给sw_ring */
	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
					  sizeof(struct ixgbe_rx_entry) * len,
					  RTE_CACHE_LINE_SIZE, socket_id);
	...
	/* rx_queues[queue_idx]指向ixgbe_rx_queue */
	dev->data->rx_queues[queue_idx] = rxq;
	...
	/* 设置接收队列参数 */
	ixgbe_reset_rx_queue(adapter, rxq);
	...
}

static void __attribute__((cold))
ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
{
	...
	rxq->rx_nb_avail = 0;
	rxq->rx_next_avail = 0;
	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
	rxq->rx_tail = 0;
	rxq->nb_rx_hold = 0;
	rxq->pkt_first_seg = NULL;
	rxq->pkt_last_seg = NULL;
	...
}

rte_eth_tx_queue_setup()

int
rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
		       uint16_t nb_tx_desc, unsigned int socket_id,
		       const struct rte_eth_txconf *tx_conf)
{
	struct rte_eth_dev *dev;
	struct rte_eth_dev_info dev_info;
	void **txq;

	/* 检查port_id是否合法 */
	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

	/* 得到port_id对应的设备 */
	dev = &rte_eth_devices[port_id];
	if (tx_queue_id >= dev->data->nb_tx_queues) {
		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
		return -EINVAL;
	}

	/* 检查设备是否已启动 */
	if (dev->data->dev_started) {
		RTE_PMD_DEBUG_TRACE(
		    "port %d must be stopped to allow configuration\n", port_id);
		return -EBUSY;
	}

	/* 检查dev_infos_get和tx_queue_setup是否定义 */
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP);

	rte_eth_dev_info_get(port_id, &dev_info);

	/* 检查nb_tx_desc是否大于网卡上限 */
	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
		RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), "
				"should be: <= %hu, = %hu, and a product of %hu\n",
				nb_tx_desc,
				dev_info.tx_desc_lim.nb_max,
				dev_info.tx_desc_lim.nb_min,
				dev_info.tx_desc_lim.nb_align);
		return -EINVAL;
	}

	/* 得到发送队列数组 */
	txq = dev->data->tx_queues;
	if (txq[tx_queue_id]) {
		/* 检查tx_queue_release是否定义 */
		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
					-ENOTSUP);
		/* ixgbe为ixgbe_dev_tx_queue_release() */
		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
		txq[tx_queue_id] = NULL;
	}

	if (tx_conf == NULL)
		tx_conf = &dev_info.default_txconf;

	/* ixgbe为ixgbe_dev_tx_queue_setup() */
	return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
					       socket_id, tx_conf);
}

ixgbe_dev_tx_queue_setup()

int __attribute__((cold))
ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
			 uint16_t queue_idx,
			 uint16_t nb_desc,
			 unsigned int socket_id,
			 const struct rte_eth_txconf *tx_conf)
{
	...
	/* 分配ixgbe_tx_queue */
	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
				 RTE_CACHE_LINE_SIZE, socket_id);
	...
	/* 分配desc数组,数组元素类型为union ixgbe_adv_tx_desc
	 * sizeof(union ixgbe_adv_tx_desc) * 4096 */
	tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
			sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
			IXGBE_ALIGN, socket_id);
	...
	/* 初始化txq */
	txq->nb_tx_desc = nb_desc;
	txq->tx_rs_thresh = tx_rs_thresh;
	txq->tx_free_thresh = tx_free_thresh;
	txq->pthresh = tx_conf->tx_thresh.pthresh;
	txq->hthresh = tx_conf->tx_thresh.hthresh;
	txq->wthresh = tx_conf->tx_thresh.wthresh;
	txq->queue_id = queue_idx;
	txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
		queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
	txq->port_id = dev->data->port_id;
	txq->txq_flags = tx_conf->txq_flags;
	txq->ops = &def_txq_ops;
	txq->tx_deferred_start = tx_conf->tx_deferred_start;
	...
	/* 设置tdt_reg_addr为TDT寄存器的地址 */
	txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
	...
	/* tx_ring_phys_addr指向desc数组的总线地址 */
	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
	/* tx_ring指向desc数组的虚拟地址 */
	txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
	...
	/* 分配entry数组,地址赋给sw_ring */
	txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
				sizeof(struct ixgbe_tx_entry) * nb_desc,
				RTE_CACHE_LINE_SIZE, socket_id);
	...
	/* ixgbe_reset_tx_queue()
	 * 设置发送队列参数 */
	txq->ops->reset(txq);
	...
	/* tx_queues[queue_idx]指向ixgbe_tx_queue */
	dev->data->tx_queues[queue_idx] = txq;
	...
}

static void __attribute__((cold))
ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
{
	...
	prev = (uint16_t) (txq->nb_tx_desc - 1);
	for (i = 0; i < txq->nb_tx_desc; i++) {
		...
		txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
		txe[i].mbuf = NULL;
		txe[i].last_id = i;
		txe[prev].next_id = i;
		prev = i;
	}
	...
	txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
	txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
	...
	txq->tx_tail = 0;
	txq->nb_tx_used = 0;
	...
	txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
	txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
	...
}

rte_eth_dev_start()

int
rte_eth_dev_start(uint8_t port_id)
{
	struct rte_eth_dev *dev;
	int diag;

	/* 检查port_id是否合法 */
	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);

	/* 得到port_id对应的设备 */
	dev = &rte_eth_devices[port_id];

	/* 检查dev_start是否定义 */
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);

	/* 检查设备是否已启动 */
	if (dev->data->dev_started != 0) {
		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8
			" already started\n",
			port_id);
		return 0;
	}

	/* ixgbe为ixgbe_dev_start() */
	diag = (*dev->dev_ops->dev_start)(dev);
	if (diag == 0)
		dev->data->dev_started = 1;
	else
		return diag;

	/* 保存配置 */
	rte_eth_dev_config_restore(port_id);

	if (dev->data->dev_conf.intr_conf.lsc == 0) {
		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP);
		(*dev->dev_ops->link_update)(dev, 0);
	}
	return 0;
}

ixgbe_dev_start()

static int
ixgbe_dev_start(struct rte_eth_dev *dev)
{
	...
	/* 为每个ixgbe_tx_queue配置网卡的TDBAL、TDBAH、TDLEN、TDH、TDT */
	ixgbe_dev_tx_init(dev);
	...
	/* 为每个ixgbe_rx_queue配置网卡的RDBAL、RDBAH、RDLEN、RDH、RDT */
	err = ixgbe_dev_rx_init(dev);
	...
	err = ixgbe_dev_rxtx_start(dev);
	...
}

void __attribute__((cold))
ixgbe_dev_tx_init(struct rte_eth_dev *dev)
{
	...
	for (i = 0; i < dev->data->nb_tx_queues; i++) {
		txq = dev->data->tx_queues[i];
		...
		bus_addr = txq->tx_ring_phys_addr; /* desc数组的总线地址 */
		/* 将desc数组的总线地址写入网卡寄存器
		 * TDBAL(TX Descriptor Base Address Low)
		 * TDBAH(TX Descriptor Base Address High)
		 * TDLEN(TX Descriptor Length)
		 * TDH(TX Descriptor Head)
		 * TDT(TX Descriptor Tail)
		 * #define IXGBE_TDBAL(_i)		(0x06000 + ((_i) * 0x40))
		 * #define IXGBE_TDBAH(_i)		(0x06004 + ((_i) * 0x40))
		 * #define IXGBE_TDLEN(_i)		(0x06008 + ((_i) * 0x40))
		 * #define IXGBE_TDH(_i)		(0x06010 + ((_i) * 0x40))
		 * #define IXGBE_TDT(_i)		(0x06018 + ((_i) * 0x40)) */
		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
				(uint32_t)(bus_addr & 0x00000000ffffffffULL));
		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
				(uint32_t)(bus_addr >> 32));
		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
				txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc)); /* desc数组的长度 */
		/* Setup the HW Tx Head and TX Tail descriptor pointers */
		IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0); /* 写TDH为0 */
		IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0); /* 写TDT为0 */
		...
	}
	...
}

int __attribute__((cold))
ixgbe_dev_rx_init(struct rte_eth_dev *dev)
{
	...
	for (i = 0; i < dev->data->nb_rx_queues; i++) {
		rxq = dev->data->rx_queues[i];
		...
		bus_addr = rxq->rx_ring_phys_addr; /* desc数组的总线地址 */
		/* 将desc数组的总线地址写入网卡寄存器
		 * RDBAL(RX Descriptor Base Address Low)
		 * RDBAH(RX Descriptor Base Address High)
		 * RDLEN(RX Descriptor Length)
		 * RDH(RX Descriptor Head)
		 * RDT(RX Descriptor Tail)
		 * #define IXGBE_RDBAL(_i)	(((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \
		 * 				(0x0D000 + (((_i) - 64) * 0x40)))
		 * #define IXGBE_RDBAH(_i)	(((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \
		 * 				(0x0D004 + (((_i) - 64) * 0x40)))
		 * #define IXGBE_RDLEN(_i)	(((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \
		 * 				(0x0D008 + (((_i) - 64) * 0x40)))
		 * #define IXGBE_RDH(_i)	(((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \
		 * 				(0x0D010 + (((_i) - 64) * 0x40)))
		 * #define IXGBE_RDT(_i)	(((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \
		 * 				(0x0D018 + (((_i) - 64) * 0x40))) */
		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
				(uint32_t)(bus_addr & 0x00000000ffffffffULL));
		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
				(uint32_t)(bus_addr >> 32));
		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
				rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* desc数组的长度 */
		IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0); /* 写RDH为0 */
		IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0); /* 写RDT为0 */
		...
	}
	...
}

int __attribute__((cold))
ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
{
	...
	for (i = 0; i < dev->data->nb_tx_queues; i++) {
		...
		ret = ixgbe_dev_tx_queue_start(dev, i);
		...
	}
	...
	for (i = 0; i < dev->data->nb_rx_queues; i++) {
		...
		ret = ixgbe_dev_rx_queue_start(dev, i);
		...
	}
	...
}

int __attribute__((cold))
ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
{
	...
	/* 使能发送 */
	txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
	txdctl |= IXGBE_TXDCTL_ENABLE;
	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
	...
	/* 写TDH为0 */
	IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
	/* 写TDT为0 */
	IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
	/* 设置发送队列状态为RTE_ETH_QUEUE_STATE_STARTED */
	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
	...
}

int __attribute__((cold))
ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
	...
	/* 为每个接收队列分配mbuf */
	if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
	...
	/* 使能接收 */
	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
	rxdctl |= IXGBE_RXDCTL_ENABLE;
	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
	...
	/* 写RDH为0 */
	IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
	/* 写RDT为rxq->nb_rx_desc - 1 */
	IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
	/* 设置接收队列状态为RTE_ETH_QUEUE_STATE_STARTED */
	dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
	...
}

static int __attribute__((cold))
ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
{
	struct ixgbe_rx_entry *rxe = rxq->sw_ring;
	uint64_t dma_addr;
	unsigned int i;

	/* Initialize software ring entries */
	for (i = 0; i < rxq->nb_rx_desc; i++) {
		volatile union ixgbe_adv_rx_desc *rxd;
		struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool); /* 分配mbuf */

		if (mbuf == NULL) {
			PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
				     (unsigned) rxq->queue_id);
			return -ENOMEM;
		}

		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
		mbuf->port = rxq->port_id;

		dma_addr =
			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf)); /* mbuf的总线地址 */
		rxd = &rxq->rx_ring[i];
		rxd->read.hdr_addr = 0;
		rxd->read.pkt_addr = dma_addr; /* 总线地址赋给rxd->read.pkt_addr */
		rxe[i].mbuf = mbuf; /* 将mbuf挂载到rxe */
	}

	return 0;
}
  • 15
    点赞
  • 61
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值