10.1 Qemu的网络管理


本节通过rtl8139网卡的例子,从虚拟驱动模块入手,来分析Qemu的网络虚拟化原理与架构。

 

10.1.1 rtl8139的虚拟网卡驱动

(1) 虚拟网卡设备的初始化

static Property rtl8139_properties[] = {

   DEFINE_NIC_PROPERTIES(RTL8139State, conf),

   DEFINE_PROP_END_OF_LIST(),

};(rtl8139.c)

 

static void rtl8139_class_init(ObjectClass *klass,void *data)

{

    DeviceClass*dc = DEVICE_CLASS(klass);

   PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);

 

    k->init = pci_rtl8139_init;

    k->exit =pci_rtl8139_uninit;

       .......

    dc->reset= rtl8139_reset;

    dc->vmsd= &vmstate_rtl8139;

    dc->props = rtl8139_properties;

}

Property rtl8139_properties 包含了网卡设备需要用到的属性

#define DEFINE_NIC_PROPERTIES(_state, _conf)                            \

   DEFINE_PROP_MACADDR("mac",  _state, _conf.macaddr),               \

   DEFINE_PROP_VLAN("vlan",    _state, _conf.peer),                   \

   DEFINE_PROP_NETDEV("netdev", _state, _conf.peer),                  \

   DEFINE_PROP_INT32("bootindex", _state, _conf.bootindex, -1)

mac为网卡的mac地址。 bootindex表明该网卡作为引导设备的顺序。 vlan 和netdev用于存储虚拟网卡的数据需要发送到的peer的信息;peer对应的结构体为NetClientState。

struct NetClientState {

   NetClientInfo *info; //指向用于处理数据接收的函数指针集合

    intlink_down; //设备的link状态

   QTAILQ_ENTRY(NetClientState) next;

   NetClientState *peer; //该设备的peer

    NetQueue*send_queue; //设备的数据发送队列

    char *model;

    char *name;

    charinfo_str[256];

    unsignedreceive_disabled : 1;

};

下面是初始化函数

static int  pci_rtl8139_init(PCIDevice*dev)

{

       .........

s->nic = qemu_new_nic(&net_rtl8139_info,&s->conf,

                         object_get_typename(OBJECT(dev)), dev->qdev.id, s);

qemu_format_nic_info_str(&s->nic->nc,s->conf.macaddr.a);

..........

}

s->conf中存储了该网卡设备的peer端信息,10.1.2将分析网卡的实例初始化过程

 

qemu_new_nic是qemu Net层的接口函数(Net\net.c), 流程如下:

a)       NetClientState*  nc = qemu_new_net_client(info,conf->peer, model, name);

b)       nic =DO_UPCAST(NICState, nc, nc);

nic->conf = conf;

nic->opaque = opaque; //opaque为RTL8139State

 

NetClientState *qemu_new_net_client(NetClientInfo*info,

                                   NetClientState *peer,

                                    const char*model,

                                    const char *name) {

   NetClientState *nc;

    nc =g_malloc0(info->size);

    nc->info= info;

    nc->model= g_strdup(model);

    if (name) {

       nc->name = g_strdup(name);

    } else {

       nc->name = assign_name(nc, model);

    }

 

    if (peer) {

       nc->peer = peer;

       peer->peer = nc;

    }

//将该NetClientState加入net_client链表

    QTAILQ_INSERT_TAIL(&net_clients, nc,next);

//建立数据发送队列,该结构较简单,读者可以自行分析

    nc->send_queue= qemu_new_net_queue(nc); 

  return nc;

}

 (2)虚拟网卡的数据传输

net_rtl8139_info 实现了网卡设备NetClientState的NetClientInfo,用于实现网卡设备接收数据的处理:

static NetClientInfo net_rtl8139_info = {

    .type =NET_CLIENT_OPTIONS_KIND_NIC,

    .size =sizeof(NICState),

    .can_receive= rtl8139_can_receive,

    .receive =rtl8139_receive,

    .cleanup =rtl8139_cleanup,

};

.recevie由该网卡对应的peer端收到数据后调用,同理当网卡在发送数据后, 会调用对应peer端的接收函数。

static ssize_t rtl8139_receive(NetClientState *nc,const uint8_t *buf, size_t size)

{

    returnrtl8139_do_receive(nc, buf, size, 1);

}

buf和size由peer端的发送函数准备好。rtl8139_do_receive的主要流程描述如下:

a. 检察网卡状态能否接收数据

b.  根据网卡配置处理多播报包

c. 读取ring buffer的数据地址(pci_dma_read),并将数据buf写到这些地址(pci_dma_write)

d. 用rtl8139_update_irq ==》 qemu_set_irq 向虚拟机注入中断

 

同理当网卡向外发送数据的过程,就是调用其peer端接收函数的过程,其流程如下:

rtl8139_io_writel ==》寄存器TxStatus0  到 TxStatus0+4*4-1的写触发网卡的发送功能:

rtl8139_TxStatus_write ==》 rtl8139_transmit ==》 rtl8139_transmit_one

 

a. 下面的代码将数据从desc中指定的位置读出来放到txbuffer中

    pci_dma_read(&s->dev,s->TxAddr[descriptor], txbuffer, txsize);

b. rtl8139_transfer_frame(s, txbuffer, txsize, 0,NULL);

c. rtl8139_update_irq(s) 传输完成,触发中断

 

rtl8139_transfer_frame ==> qemu_send_packet(&s->nic->nc,buf, size) ==> qemu_send_packet_async(nc, buf, size, NULL) ==>

qemu_send_packet_async_with_flags(sender,QEMU_NET_PACKET_FLAG_NONE,

                                             buf, size, sent_cb);

static ssize_t qemu_send_packet_async_with_flags(NetClientState*sender,

                                                unsigned flags,

                                                const uint8_t *buf, int size,

                                                NetPacketSent *sent_cb) {

    ......

    queue =sender->peer->send_queue; //取出该网卡的peer的发送queue

    returnqemu_net_queue_send(queue, sender, flags, buf, size, sent_cb);

}

 

ssize_t qemu_net_queue_send(NetQueue *queue,

                            NetClientState*sender,

                            unsigned flags,

                            const uint8_t*data,

                            size_t size,

                           NetPacketSent *sent_cb) {

       .......

    ret =qemu_net_queue_deliver(queue, sender, flags, data, size);

    if (ret ==0) {

       qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);

        return0;

    }

 

    qemu_net_queue_flush(queue);

    return ret;

}

qemu_net_queue_deliver ==> qemu_deliver_packet

ssize_t qemu_deliver_packet(NetClientState *sender,

                            unsigned flags,

                            const uint8_t*data,

                            size_t size,

                            void *opaque) {

   NetClientState *nc = opaque;

     ......

    if (flags& QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {

        ret =nc->info->receive_raw(nc, data, size);

    } else {

        ret =nc->info->receive(nc, data, size);

    }

 

    if (ret ==0) {

       nc->receive_disabled = 1;

    };

 

    return ret;

}

qemu_deliver_packet函数最后调用到了peer端的接收函数

 

 

10.1.2 rtl8139的虚拟网卡的实例化

pc_init1 ==》 pci_nic_init_nofail(nd,"rtl8139", NULL);

nd为NICInfo nd_table[MAX_NICS]; 该数组有main函数初始化,下一节分析

 

PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char*default_model,

                               const char*default_devaddr)(pci.c)

{

    PCIDevice*res;

 

    if(qemu_show_nic_models(nd->model, pci_nic_models))

        exit(0);

 

    res =pci_nic_init(nd, default_model, default_devaddr);

    if (!res)

        exit(1);

    return res;

}

pci_nic_models定义了qemu支持的网卡类别

static const char * const pci_nic_models[] = {

   "ne2k_pci",

   "i82551",

   "i82557b",

   "i82559er",

   "rtl8139",

   "e1000",

   "pcnet",

   "virtio",

    NULL

};

 

PCIDevice *pci_nic_init(NICInfo *nd, const char*default_model,

                       const char *default_devaddr) {

    const char*devaddr = nd->devaddr ? nd->devaddr : default_devaddr;

    ..........

    i =qemu_find_nic_model(nd, pci_nic_models, default_model);

    if (i < 0)

        returnNULL;

 

    bus =pci_get_bus_devfn(&devfn, devaddr);

    ........

//创建pci网卡,pci_rtl8139_init会被调用

    pci_dev =pci_create(bus, devfn, pci_nic_names[i]);

    dev =&pci_dev->qdev;

 //设置网卡的属性    

 qdev_set_nic_properties(dev, nd);

    if(qdev_init(dev) < 0)

        returnNULL;

    returnpci_dev;

}

 

void qdev_set_nic_properties(DeviceState *dev, NICInfo*nd) { (qdev.c)

   qdev_prop_set_macaddr(dev, "mac", nd->macaddr.a);

    if(nd->netdev)

       qdev_prop_set_netdev(dev, "netdev", nd->netdev);

    if(nd->nvectors != DEV_NVECTORS_UNSPECIFIED &&

       object_property_find(OBJECT(dev), "vectors", NULL)) {

       qdev_prop_set_uint32(dev, "vectors", nd->nvectors);

    }

   nd->instantiated = 1;

}

 

下一节,就分析nd结构的数据来源.

 

10.1.3 NicInfo初始化

main ==> net_init_clients

int net_init_clients(void) {

    QemuOptsList*net = qemu_find_opts("net");

 

    if(default_net) {

        /* if noclients, we use a default config */

       qemu_opts_set(net, NULL, "type", "nic");

#ifdef CONFIG_SLIRP

       qemu_opts_set(net, NULL, "type", "user");

#endif

    }

 

   QTAILQ_INIT(&net_clients);

 

    if(qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL,1) == -1)

        return-1;

 

    if(qemu_opts_foreach(net, net_init_client, NULL, 1) == -1) {

        return-1;

    }

 

    return 0;

}

net_init_netdev 和net_init_clent都会调用

       net_client_init(opts,1, &local_err);(net.c)

 

net_client_init==>net_client_init1 ==> net_client_init_fun[opts->kind](opts,name, peer)

 

下面是一个使用bridge网络方式的虚拟机启动命令行:

qemu-system-x86_x64 /mnt/ubuntun.img -smp 2 -m 1024 -net nic -nettap,ifname=tap1,script=/etc/qemu-ifup,downscript=no

上面的参数中-net出现了2次, 第一个指定了网卡(采用默认的rtl8139),第二个指定了tap方式。

因此, qemu_opts_foreach会调用两次net_client_init; 而net_client_init1第一次会调用net_init_nic, 第二次调用net_init_tap

static int net_client_init1(const void *object, intis_netdev, Error **errp) {

    ...........

  if (!is_netdev&&

           (opts->kind != NET_CLIENT_OPTIONS_KIND_NIC ||

            !opts->nic->has_netdev)) {

            peer= net_hub_add_port(u.net->has_vlan ? u.net->vlan : 0, NULL);

        }

 

        if (net_client_init_fun[opts->kind](opts,name, peer) < 0) {

           error_set(errp, QERR_DEVICE_INIT_FAILED,

                     NetClientOptionsKind_lookup[opts->kind]);

            return -1;

        }

}

第一次由于是网卡且has_netdev为0,所以peer 由net_hub_add_port建立,此时port为0.

 

static int (* constnet_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(

    constNetClientOptions *opts,

    const char*name,

   NetClientState *peer) = {

       [NET_CLIENT_OPTIONS_KIND_NIC]      = net_init_nic,

#ifdef CONFIG_SLIRP

       [NET_CLIENT_OPTIONS_KIND_USER]     = net_init_slirp,

#endif

       [NET_CLIENT_OPTIONS_KIND_TAP]      = net_init_tap,

       [NET_CLIENT_OPTIONS_KIND_SOCKET]   = net_init_socket,

               ........

};

对于使用虚拟网卡的case, net_init_nic会被调用。

static int net_init_nic(const NetClientOptions *opts,const char *name,

                        NetClientState *peer) {

    ......

    nic =opts->nic;

 

    idx =nic_get_free_idx();

    .......

    nd =&nd_table[idx];

 

    memset(nd,0, sizeof(*nd));

 

    if(nic->has_netdev) {

              //根据设备id获得netdev的NetClientState

       nd->netdev = qemu_find_netdev(nic->netdev);

    } else {

        nd->netdev = peer;

    }

    if (name) {

        nd->name= g_strdup(name);

    }

    if(nic->has_model) {

       nd->model = g_strdup(nic->model);

    }

    if(nic->has_addr) {

       nd->devaddr = g_strdup(nic->addr);

    }

 

   qemu_macaddr_default_if_unset(&nd->macaddr);

 

    if(nic->has_vectors) {

       nd->nvectors = nic->vectors;

    } else {

       nd->nvectors = DEV_NVECTORS_UNSPECIFIED;

    }

 

    nd->used= 1;

    nb_nics++;

 

    return idx;

}

 

net_client_init1第二次执行时也会调用net_hub_add_port,建立peer,才此时port为1. 接下来net_init_tap会被调用。并且有如下的调用关系:

net_init_tap(tap.c) ==> net_tap_fd_init(peer, model, name, fd,vnet_hdr) ==>

       qemu_new_net_client(&net_tap_info,peer, model, name)

问题在于网卡的peer时hub.port0, 而net_tap的peer是hub.port1,这二者如何关联呢?

static NetClientInfo net_hub_port_info = {

    .type =NET_CLIENT_OPTIONS_KIND_HUBPORT,

    .size =sizeof(NetHubPort),

    .can_receive= net_hub_port_can_receive,

    .receive =net_hub_port_receive,

    .receive_iov= net_hub_port_receive_iov,

    .cleanup =net_hub_port_cleanup,

};

net_hub_port_receive ==> net_hub_receive

static ssize_t net_hub_receive(NetHub *hub, NetHubPort*source_port,

                               const uint8_t*buf, size_t len) {

    NetHubPort*port;

 

   QLIST_FOREACH(port, &hub->ports, next) {

        if (port== source_port) {

           continue;

        }

 

       qemu_send_packet(&port->nc, buf, len);

    }

    return len;

}

原来hub port会向出自己以外的port转发包

 

10.1.4 Net Tap模块

对于上节的参数net_init_tap 首先调用net_tap_init流程如下:

a)       调用tap_open 建立tap设备

b)       launch_script调用setup的脚本

 

接着调用net_tap_fd_init

a) qemu_new_net_client注册net_tap_info

   staticNetClientInfo net_tap_info = {

    .type =NET_CLIENT_OPTIONS_KIND_TAP,

    .size =sizeof(TAPState),

    .receive =tap_receive,

    .receive_raw= tap_receive_raw,

    .receive_iov= tap_receive_iov,

    .poll =tap_poll,

    .cleanup =tap_cleanup,

};

b) tap_set_offload(&s->nc, 0, 0, 0, 0, 0);

c) tap_read_poll ==> tap_update_fd_handler

  static voidtap_update_fd_handler(TAPState *s)

  {

   qemu_set_fd_handler2(s->fd,

                        s->read_poll  ? tap_can_send : NULL,

                         s->read_poll  ? tap_send    : NULL,

                         s->write_poll ?tap_writable : NULL,  s);

}

注册tap设备的读写回调。

 

net_init_tap最后准备好downscript的字符串。

 

当实际网络数据传入tap时,tap_send被调用,tap_send将调用qemu_send_packet_async,最终将数据传给虚拟网卡的receive.

 

当虚拟网卡发送数据时,tap_receive会被调用:

tap_receive ==> tap_write_packet ==> writev 将数据发送出去。

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值