继续本专题的研究,关于本专题前期的内容请参考:
- dpdk vhost研究(一)
- dpdk vhost研究(二)
本文会重点讨论下vhost pmd和lib库中的api如何使用。
在前面的章节中描述过virtio-net设备的生命周期包括设备创建、配置、服务启动和设备销毁几个阶段。
我们先回忆下整个生命周期:
- 设备创建 vhost-user通过socket连接来创建。当创建一个virtio-net设备是,需要:
- 分配新的virtio-net设备结构,并添加到设备链表中 - 为该设备分配一个处理处理核并添加设备到数据面的链表中 - 在vhost上分配一个为virtio-net设备服务的RX\TX队列
- 配置
利用VHOST_SET_VRING_*消息通知vhost虚拟队列的大小、基本索引和位置,vhost将虚拟队列映射到自己的虚拟地址空间 - 服务启动
vhost利用VHOST_SET_VRING_KICK消息来启动虚拟队列服务。之后,vhost便可以轮询接收队列,并将数据放到virtio-net设备的接收队列上。同时,也可以轮询发送虚拟队列,查看是否有待发送的数据包,如果有,则将其复制到发送队列中。 - 设备销毁
vhost利用VHOST_GET_VRING_BASE消息来通知停止提供对接收队列和发送虚拟队列的服务。同时,分配给virtio-net设备的处理和和物理网卡上的RX和TX队列也将被释放。
在examples\vhost目录下有对virtio-net使用的示例,一起来研究下:
int main(int argc, char *argv[])
{
unsigned lcore_id, core_id = 0;
unsigned nb_ports, valid_num_ports;
int ret, i;
uint8_t portid;
static pthread_t tid;
char thread_name[RTE_MAX_THREAD_NAME_LEN];
uint64_t flags = 0;
signal(SIGINT, sigint_handler);
/* init EAL */
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
argc -= ret;
argv += ret;
/* parse app arguments */
ret = us_vhost_parse_args(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid argument\n");
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
if (rte_lcore_is_enabled(lcore_id))
lcore_ids[core_id++] = lcore_id;
}
if (rte_lcore_count() > RTE_MAX_LCORE)
rte_exit(EXIT_FAILURE,"Not enough cores\n");
/* Get the number of physical ports. */
nb_ports = rte_eth_dev_count();
/*
* Update the global var NUM_PORTS and global array PORTS
* and get value of var VALID_NUM_PORTS according to system ports number
*/
valid_num_ports = check_ports_num(nb_ports);
if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
"but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
return -1;
}
/*
* FIXME: here we are trying to allocate mbufs big enough for
* @MAX_QUEUES, but the truth is we're never going to use that
* many queues here. We probably should only do allocation for
* those queues we are going to use.
*/
create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE,
MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE);
if (vm2vm_mode == VM2VM_HARDWARE) {
/* Enable VT loop back to let L2 switch to do it. */
vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
RTE_LOG(DEBUG, VHOST_CONFIG,
"Enable loop back for L2 switch in vmdq.\n");
}
/* initialize all ports */
for (portid = 0; portid < nb_ports; portid++) {
/* skip ports that are not enabled */
if ((enabled_port_mask & (1 << portid)) == 0) {
RTE_LOG(INFO, VHOST_PORT,
"Skipping disabled port %d\n", portid);
continue;
}
if (port_init(portid) != 0)
rte_exit(EXIT_FAILURE,
"Cannot initialize network ports\n");
}
/* Enable stats if the user option is set. */
if (enable_stats) {
ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
if (ret != 0)
rte_exit(EXIT_FAILURE,
"Cannot create print-stats thread\n");
/* Set thread_name for aid in debugging. */
snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
ret = rte_thread_setname(tid, thread_name);
if (ret != 0)
RTE_LOG(DEBUG, VHOST_CONFIG,
"Cannot set print-stats name\n");
}
/* Launch all data cores. */
RTE_LCORE_FOREACH_SLAVE(lcore_id)
rte_eal_remote_launch(switch_worker, NULL, lcore_id);
if (client_mode)
flags |= RTE_VHOST_USER_CLIENT;
if (dequeue_zero_copy)
flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
/* Register vhost user driver to handle vhost messages. */
for (i = 0; i < nb_sockets; i++) {
char *file = socket_files + i * PATH_MAX;
ret = rte_vhost_driver_register(f