OVS DPDK与QEMU之间如何通过vhost user协议通信 vhost user协议的控制和数据通道

本文详细探讨了vhost user协议及其在OVS DPDK、QEMU和virtio-net中的实现。介绍了数据通过直接内存访问传输,以及控制通道如何使用UNIX套接口进行消息交换,特别强调了如何利用VRING_AVAIL_F_NO_INTERRUPT标志控制中断通知。此外,还讨论了QEMU和DPDK之间如何通过vhost user套接口进行数据包的发送和接收。
摘要由CSDN通过智能技术生成
netdev_dpdk_vhost_construct定义在文件openvswitch-2.9.2/lib/netdev-dpdk.c

1058 static int
1059 netdev_dpdk_vhost_construct(struct netdev *netdev)
1060 {
1061     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
1062     const char *name = netdev->name;
1063     int err;
1064 
1065     /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
1066      * the file system. '/' or '\' would traverse directories, so they're not
1067      * acceptable in 'name'. */
1068     if (strchr(name, '/') || strchr(name, '\\')) {
1069         VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
1070                  "A valid name must not include '/' or '\\'",
1071                  name);
1072         return EINVAL;
1073     }
1074 
1075     ovs_mutex_lock(&dpdk_mutex);
1076     /* Take the name of the vhost-user port and append it to the location where
1077      * the socket is to be created, then register the socket.
1078      */
1079     snprintf(dev->vhost_id, sizeof dev->vhost_id, "%s/%s",
1080              dpdk_get_vhost_sock_dir(), name);
1081 
1082     dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT;
1083     err = rte_vhost_driver_register(dev->vhost_id, dev->vhost_driver_flags);
1084     if (err) {
1085         VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
1086                  dev->vhost_id);
1087         goto out;
1088     } else {
1089         fatal_signal_add_file_to_unlink(dev->vhost_id);
1090         VLOG_INFO("Socket %s created for vhost-user port %s\n",
1091                   dev->vhost_id, name);
1092     }
1093 
1094     err = rte_vhost_driver_callback_register(dev->vhost_id,
1095                                                 &virtio_net_device_ops);
1096     if (err) {
1097         VLOG_ERR("rte_vhost_driver_callback_register failed for vhost user "
1098                  "port: %s\n", name);
1099         goto out;
1100     }
1101 
1102     err = rte_vhost_driver_disable_features(dev->vhost_id,
1103                                 1ULL << VIRTIO_NET_F_HOST_TSO4
1104                                 | 1ULL << VIRTIO_NET_F_HOST_TSO6
1105                                 | 1ULL << VIRTIO_NET_F_CSUM);
1106     if (err) {
1107         VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
1108                  "port: %s\n", name);
1109         goto out;
1110     }
1111 
1112     err = rte_vhost_driver_start(dev->vhost_id);
1113     if (err) {
1114         VLOG_ERR("rte_vhost_driver_start failed for vhost user "
1115                  "port: %s\n", name);
1116         goto out;
1117     }
1118 
1119     err = vhost_common_construct(netdev);
1120     if (err) {
1121         VLOG_ERR("vhost_common_construct failed for vhost user "
1122                  "port: %s\n", name);
1123     }
1124 
1125 out:
1126     ovs_mutex_unlock(&dpdk_mutex);
1127     VLOG_WARN_ONCE("dpdkvhostuser ports are considered deprecated;  "
1128                    "please migrate to dpdkvhostuserclient ports.");
1129     return err;
1130 }
netdev_dpdk_vhost_construct函数调用rte_vhost_driver_register。以下代码均定义在dpdk-18.08/lib/librte_vhost/socket.c

 798 /*
 799  * Register a new vhost-user socket; here we could act as server
 800  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
 801  * is set.
 802  */
 803 int
 804 rte_vhost_driver_register(const char *path, uint64_t flags)
 805 {
 
 867     if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
 868         vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
 869         if (vsocket->reconnect && reconn_tid == 0) {
 870             if (vhost_user_reconnect_init() != 0)
 871                 goto out_mutex;
 872         }
 873     } else {
 874         vsocket->is_server = true;
 875     }
 876     ret = create_unix_socket(vsocket);
 877     if (ret < 0) {
 878         goto out_mutex;
 879     }
netdev_dpdk_vhost_construct函数调用rte_vhost_driver_start。定义在dpdk-18.08/lib/librte_vhost/socket.c

1023 int
1024 rte_vhost_driver_start(const char *path)
1025 {
 
1059     if (vsocket->is_server)
1060         return vhost_user_start_server(vsocket);
1061     else
1062         return vhost_user_start_client(vsocket);
1063 }
vhost_user_create_server调用vhost_user_server_new_connection:

以下的3个函数调用vhost_user_add_connection:

 266 /* call back when there is new vhost-user connection from client  */
 267 static void
 268 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
 269 {
(...)
 424 static void *
 425 vhost_user_client_reconnect(void *arg __rte_unused)
 426 {
(...)
 494 static int
 495 vhost_user_start_client(struct vhost_user_socket *vsocket)
 496 {
(...)
 
 194 static void
 195 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 196 {
vhost_user_add_connection接下来执行vhost_user_read_cb函数,其又调用vhost_user_msg_handler函数处理接收到的消息。

 280 static void
 281 vhost_user_read_cb(int connfd, void *dat, int *remove)
 282 {
 283     struct vhost_user_connection *conn = dat;
 284     struct vhost_user_socket *vsocket = conn->vsocket;
 285     int ret;
 286 
 287     ret = vhost_user_msg_handler(conn->vid, connfd);
 288     if (ret < 0) {
 289         close(connfd);
 290         *remove = 1;
 291         vhost_destroy_device(conn->vid);
 292 
 293         if (vsocket->notify_ops->destroy_connection)
 294             vsocket->notify_ops->destroy_connection(conn->vid);
 295 
 296         pthread_mutex_lock(&vsocket->conn_mutex);
 297         TAILQ_REMOVE(&vsocket->conn_list, conn, next);
 298         pthread_mutex_unlock(&vsocket->conn_mutex);
 299 
 300         free(conn);
 301 
 302         if (vsocket->reconnect) {
 303             create_unix_socket(vsocket);
 304             vhost_user_start_client(vsocket);
 305         }
 306     }
 307 }
dpdk-18.08/lib/librte_vhost/vhost_user.c

1548 int
1549 vhost_user_msg_handler(int vid, int fd)
1550 {
1551     struct virtio_net *dev;
1552     struct VhostUserMsg msg;
1553     struct rte_vdpa_device *vdpa_dev;
1554     int did = -1;
1555     int ret;
1556     int unlock_required = 0;
1557     uint32_t skip_master = 0;
1558 
1559     dev = get_device(vid);
1560     if (dev == NULL)
1561         return -1;
1562 
1563     if (!dev->notify_ops) {
1564         dev->notify_ops = vhost_driver_callback_get(dev->ifname);
1565         if (!dev->notify_ops) {
1566             RTE_LOG(ERR, VHOST_CONFIG,
1567                 "failed to get callback ops for driver %s\n",
1568                 dev->ifname);
1569             return -1;
1570         }
1571     }
1572 
1573     ret = read_vhost_message(fd, &msg);
1574     if (ret <= 0 || msg.request.master >= VHOST_USER_MAX) {
1575         if (ret < 0)
1576             RTE_LOG(ERR, VHOST_CONFIG,
1577                 "vhost read message failed\n");
1578         else if (ret == 0)
1579             RTE_LOG(INFO, VHOST_CONFIG,
1580                 "vhost peer closed\n");
1581         else
1582             RTE_LOG(ERR, VHOST_CONFIG,
1583                 "vhost read incorrect message\n");
1584 
1585         return -1;
1586     }
1587 
1588     ret = 0;
1589     if (msg.request.master != VHOST_USER_IOTLB_MSG)
1590         RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
1591             vhost_message_str[msg.request.master]);
1592     else
1593         RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
1594             vhost_message_str[msg.request.master]);
1595 
1596     ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
1597     if (ret < 0) {
1598         RTE_LOG(ERR, VHOST_CONFIG,
1599             "failed to alloc queue\n");
1600         return -1;
1601     }
1602 
1603     /*
1604      * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE
1605      * and VHOST_USER_RESET_OWNER, since it is sent when virtio stops
1606      * and device is destroyed. destroy_device waits for queues to be
1607      * inactive, so it is safe. Otherwise taking the access_lock
1608      * would cause a dead lock.
1609      */
1610     switch (msg.request.master) {
1611     case VHOST_USER_SET_FEATURES:
1612     case VHOST_USER_SET_PROTOCOL_FEATURES:
1613     case VHOST_USER_SET_OWNER:
1614     case VHOST_USER_SET_MEM_TABLE:
1615     case VHOST_USER_SET_LOG_BASE:
1616     case VHOST_USER_SET_LOG_FD:
1617     case VHOST_USER_SET_VRING_NUM:
1618     case VHOST_USER_SET_VRING_ADDR:
1619     case VHOST_USER_SET_VRING_BASE:
1620     case VHOST_USER_SET_VRING_KICK:
1621     case VHOST_USER_SET_VRING_CALL:
1622     case VHOST_USER_SET_VRING_ERR:
1623     case VHOST_USER_SET_VRING_ENABLE:
1624     case VHOST_USER_SEND_RARP:
1625     case VHOST_USER_NET_SET_MTU:
1626     case VHOST_USER_SET_SLAVE_REQ_FD:
1627         vhost_user_lock_all_queue_pairs(dev);
1628         unlock_required = 1;
1629         break;
1630     default:
1631         break;
1632 
1633     }
1634 
1635     if (dev->extern_ops.pre_msg_handle) {
1636         uint32_t need_reply;
1637 
1638         ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
1639                 (void *)&msg, &need_reply, &skip_master);
1640         if (ret < 0)
1641             goto skip_to_reply;
1642 
1643         if (need_reply)
1644             send_vhost_reply(fd, &msg);
1645 
1646         if (skip_master)
1647             goto skip_to_post_handle;
1648     }
1649 
1650     switch (msg.request.master) {
1651     case VHOST_USER_GET_FEATURES:
1652         msg.payload.u64 = vhost_user_get_features(dev);
1653         msg.size = sizeof(msg.payload.u64);
1654         send_vhost_reply(fd, &msg);
1655         break;
1656     case VHOST_USER_SET_FEATURES:
1657         ret = vhost_user_set_features(dev, msg.payload.u64);
1658         if (ret)
1659             return -1;
1660         break;
1661 
1662     case VHOST_USER_GET_PROTOCOL_FEATURES:
1663         vhost_user_get_protocol_features(dev, &msg);
1664         send_vhost_reply(fd, &msg);
1665         break;
1666     case VHOST_USER_SET_PROTOCOL_FEATURES:
1667         vhost_user_set_protocol_features(dev, msg.payload.u64);
1668         break;
1669 
1670     case VHOST_USER_SET_OWNER:
1671         vhost_user_set_owner();
1672         break;
1673     case VHOST_USER_RESET_OWNER:
1674         vhost_user_reset_owner(dev);
1675         break;
1676 
1677     case VHOST_USER_SET_MEM_TABLE:
1678         ret = vhost_user_set_mem_table(&dev, &msg);
1679         break;
1680 
1681     case VHOST_USER_SET_LOG_BASE:
1682         vhost_user_set_log_base(dev, &msg);
1683 
1684         /* it needs a reply */
1685         msg.size = sizeof(msg.payload.u64);
1686         send_vhost_reply(fd, &msg);
1687         break;
1688     case VHOST_USER_SET_LOG_FD:
1689         close(msg.fds[0]);
1690         RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
1691         break;
1692 
1693     case VHOST_USER_SET_VRING_NUM:
1694         vhost_user_set_vring_num(dev, &msg);
1695         break;
1696     case VHOST_USER_SET_VRING_ADDR:
1697         vhost_user_set_vring_addr(&dev, &msg);
1698         break;
1699     case VHOST_USER_SET_VRING_BASE:
1700         vhost_user_set_vring_base(dev, &msg);
1701         break;
1702 
1703     case VHOST_USER_GET_VRING_BASE:
1704         vhost_user_get_vring_base(dev, &msg);
1705         msg.size = sizeof(msg.payload.state);
1706         send_vhost_reply(fd, &msg);
1707         break;
1708 
1709     case VHOST_USER_SET_VRING_KICK:
1710         vhost_user_set_vring_kick(&dev, &msg);
1711         break;
1712     case VHOST_USER_SET_VRING_CALL:
1713         vhost_user_set_vring_call(dev, &msg);
1714         break;
1715 
1716     case VHOST_USER_SET_VRING_ERR:
1717      
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值