qemu-kvm virtio 虚拟化-----Linux客户机 virtio设备初始化 (基于Qemu2.0.0 & kernel 3.10.0)

  转发请注明地址:http://blog.163.com/eric_liufeng/blog/static/19738268320156512954219

virtio设备物理上连接在pci物理总线上,逻辑上连接在virtio虚拟总线做为pci设备便于资源分配与配置,逻辑设备模型中,便于管理与组织。
1.qemu-kvm
提供的virtio pci设备
virtio-blk(硬盘)virtio-net(网络),virtio-balloon(气球)等pci设备,这些设备连接在pci总线上。代码位于qemu: hw/virtio-pci.c

virtio_pci.c:

static const TypeInfo virtio_net_pci_info = {
    .name          = TYPE_VIRTIO_NET_PCI, 
//"virtio-net-pci"
    .parent        = TYPE_VIRTIO_PCI, "virtio-pci"
    .instance_size = sizeof(VirtIONetPCI),
    .instance_init = virtio_net_pci_instance_init,
    .class_init    = virtio_net_pci_class_init,
};


static void virtio_pci_register_types(void)
{
    type_register_static(&virtio_rng_pci_info);
    type_register_static(&virtio_pci_bus_info);
    type_register_static(&virtio_pci_info);
#ifdef CONFIG_VIRTFS
    type_register_static(&virtio_9p_pci_info);
#endif
    type_register_static(&virtio_blk_pci_info);
    type_register_static(&virtio_scsi_pci_info);
    type_register_static(&virtio_balloon_pci_info);
    type_register_static(&virtio_serial_pci_info);
    type_register_static(&
virtio_net_pci_info);
#ifdef CONFIG_VHOST_SCSI
    type_register_static(&vhost_scsi_pci_info);
#endif
}

type_init(virtio_pci_register_types)


virtio-net.c
static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET, 
//"virtio-net-device"
    .parent = TYPE_VIRTIO_DEVICE, //"virtio-device"
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)


2.
客户机PCI设备进行枚举和资源分配
Linux客户机系统启动时,对PCI设备进行枚举和资源分配(配置PCI的配置空间),通常由BIOS完成。不过对Linux系统提供方式,一种由BIOS实现,另一种自己实现枚举和资源分配功能。代码位于kernel:arch/x86/pci/init.c
static __init int pci_arch_init(void)
{
#ifdef CONFIG_PCI_DIRECT
     int type = 0;

     type = pci_direct_probe();
#endif

#ifdef CONFIG_PCI_BIOS
     pci_pcbios_init();
#endif

}

真正设备枚举和资源分配由这里开始,代码位域 kernel: arch/x86/pci/legacy.c

/*真正设备枚举和资源分配由这里开始*/
int __init pci_legacy_init(void)
{
     if (!raw_pci_ops) {
          printk("PCI: System does not support PCI\n");
          return 0;
     }

     printk("PCI: Probing PCI hardware\n");
     pcibios_scan_root(0);
     return 0;
}

 

pcibios_scan_root->pci_scan_bus_on_node->pci_scan_root_bus->pci_scan_child_bus->pci_scan_slot->pci_scan_single_device->pci_device_add

PCI总线上的设备添加到链表

void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{

     /*
     * Add the device to our list of discovered devices
     * and the bus list for fixup functions, etc.
     */
     down_write(&pci_bus_sem);
     list_add_tail(&dev->bus_list, &bus->devices);
     up_write(&pci_bus_sem);

}

上述过程执行完成,在/sys/devices/pci0000:00目录下,创建virtio pci设备。并且在/sys/bus/pci/devices/目录下,创建相应对于pci设备的符号连接,同时在/sys/bus/pci /drivers/目录下,创建virtio-pci目录,目录下存在支持设备符号连接文件。

3.virtio
总线定义与注册,virtio总线为虚拟的总线,目的为了设备管理与组织需要。代码位于:
kernel: drivers\virtio\virtio.c

 

static struct bus_type virtio_bus = {
     .name  = "virtio",
     .match = virtio_dev_match,
     .dev_attrs = virtio_dev_attrs,
     .uevent = virtio_uevent,
     .probe = virtio_dev_probe,
     .remove = virtio_dev_remove,
};

static int virtio_init(void)
{
     if (bus_register(&virtio_bus) != 0)
          panic("virtio bus registration failed");
     return 0;
}
 
上述注册函数调用执行完成,在/sys/bus/目录下,创建了一个新的目录virtio,在该目录下同时创建了两个文件夹为devicesdrivers。表示创建virtio总线,总线支持设备与驱动devicesdrivers目录下。  

4. virtio-pci
设备驱动加载,代码位于:kernel: drviers/virtio/virtio_pci.c
static struct pci_driver virtio_pci_driver = {
     .name          = "virtio-pci",
     .id_table     = virtio_pci_id_table,
     .probe          = virtio_pci_probe,
     .remove          = virtio_pci_remove,
#ifdef CONFIG_PM
     .driver.pm     = &virtio_pci_pm_ops,
#endif
};

 

module_pci_driver(virtio_pci_driver);

 

#define module_pci_driver(__pci_driver) \
     module_driver(__pci_driver, pci_register_driver, \
                 pci_unregister_driver)

 

#define pci_register_driver(driver)          \
     __pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)

 

int __pci_register_driver(struct pci_driver *drv, struct module *owner, const char *mod_name)
{
     return driver_register(&drv->driver);
}

 

上述注册函数调用执行完成,在/sys/bus/pci/drivers/sys/devices目录下创建了virtio-pci文件夹

5
virtio总线子设备注册
上面步骤2,对PCI设备进行枚举和资源分配中介绍了,枚举的设备,已经关联到总线链表中。对函数调用 
module_pci_driver(virtio_pci_driver);就是对链表的每一个pci设备进行探测,该驱动是否支持该设备,如果支持进,调用驱动probe函数,完成启用该pci设备,同时在virtio总线进行注册设备。

kernel: drivers\base\dd.c

int driver_attach(struct device_driver *drv)
{
     return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
}

 

__driver_attach-> driver_probe_device -> really_probe:

 

     if (dev->bus->probe) {
          ret = dev->bus->probe(dev);
     } else if (drv->probe) {
          ret = drv->probe(dev);

     }

 


/* the PCI probing function */
static int virtio_pci_probe(struct pci_dev *pci_dev,
                   const struct pci_device_id *id)
{
     struct virtio_pci_device *vp_dev;
     int err;

     /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
     if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
          return -ENODEV;

     if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
          printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
                 VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
          return -ENODEV;
     }

     /* allocate our structure and fill it out */
     vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
     if (vp_dev == NULL)
          return -ENOMEM;

     vp_dev->vdev.dev.parent = &pci_dev->dev;
     vp_dev->vdev.dev.release = virtio_pci_release_dev;
     vp_dev->vdev.config = &virtio_pci_config_ops;
     vp_dev->pci_dev = pci_dev;
     INIT_LIST_HEAD(&vp_dev->virtqueues);
     spin_lock_init(&vp_dev->lock);

     /* Disable MSI/MSIX to bring device to a known good state. */
     pci_msi_off(pci_dev);

     /* enable the device */
     err = pci_enable_device(pci_dev);
     if (err)
          goto out;

     err = pci_request_regions(pci_dev, "virtio-pci");
     if (err)
          goto out_enable_device;

     vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
     if (vp_dev->ioaddr == NULL) {
          err = -ENOMEM;
          goto out_req_regions;
     }

     pci_set_drvdata(pci_dev, vp_dev);
     pci_set_master(pci_dev);

     /* we use the subsystem vendor/device id as the virtio vendor/device
     * id.  this allows us to use the same PCI vendor/device id for all
     * virtio devices and to identify the particular virtio driver by
     * the subsystem ids */
     vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
     vp_dev->vdev.id.device = pci_dev->subsystem_device;

     /* finally register the virtio device */
     
err = register_virtio_device(&vp_dev->vdev);
     if (err)
          goto out_set_drvdata;

     return 0;

out_set_drvdata:
     pci_set_drvdata(pci_dev, NULL);
     pci_iounmap(pci_dev, vp_dev->ioaddr);
out_req_regions:
     pci_release_regions(pci_dev);
out_enable_device:
     pci_disable_device(pci_dev);
out:
     kfree(vp_dev);
     return err;
}

上述注册函数调用执行完成,/sys/devices/virtio-pci/创建相应子设备{virtio1,virtio2,virtio3},同时在 /sys/bus/virtio/devices下面创建三个符号连接文件{virtio1,virtio2,virtio3}

6. virtio
总线子设备驱动注册。
virtio总线进行注册设备register_virtio_device,将调用virtio总线的probe函数:virtio_dev_probe()。该函数遍历驱动,找到支持驱动关联到该设备。

代码位于: kernel: drviers\virtio\virtio.c   

register_virtio_device -> device_register -> device_add -> bus_probe_device ->device_attach->__device_attach->driver_probe_device->really_probe

static int virtio_dev_probe(struct device *_d)

{
     int err, i;
     struct virtio_device *dev = dev_to_virtio(_d);
     struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
     u32 device_features;

     /* We have a driver! */
     add_status(dev, VIRTIO_CONFIG_S_DRIVER);

     /* Figure out what features the device supports. */
     device_features = dev->config->get_features(dev);

     /* Features supported by both device and driver into dev->features. */
     memset(dev->features, 0, sizeof(dev->features));
     for (i = 0; i < drv->feature_table_size; i++) {
          unsigned int f = drv->feature_table[i];
          BUG_ON(f >= 32);
          if (device_features & (1 << f))
               set_bit(f, dev->features);
     }

     /* Transport features always preserved to pass to finalize_features. */
     for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
          if (device_features & (1 << i))
               set_bit(i, dev->features);

     dev->config->finalize_features(dev);

     err = 
drv->probe(dev);
     if (err)
          add_status(dev, VIRTIO_CONFIG_S_FAILED);
     else {
          add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
          if (drv->scan)
               drv->scan(dev);
     }

     return err;
}

 

//virtio_balloon设备驱动实例
代码位于: kerenl: drviers\virtio\virtio_balloon.c

static struct virtio_driver virtio_balloon_driver = {
     .feature_table = features,
     .feature_table_size = ARRAY_SIZE(features),
     .driver.name =     KBUILD_MODNAME,
     .driver.owner =     THIS_MODULE,
     .id_table =     id_table,
     .probe =     virtballoon_probe,
     .remove =     virtballoon_remove,
     .config_changed = virtballoon_changed,
#ifdef CONFIG_PM
     .freeze     =     virtballoon_freeze,
     .restore =     virtballoon_restore,
#endif
};


module_virtio_driver(virtio_balloon_driver);

#define module_virtio_driver(__virtio_driver) \
     module_driver(__virtio_driver, register_virtio_driver, \
               unregister_virtio_driver)


同时在/sys/bus/virtio/drivers下面创建三个文件{virtio_balloon,virtio_blk,virtio_console},并且与设备发生关联 
//
热插拔事件的产生往往是由总线驱动级的逻辑处理,所以总线一般提供事件发送函数。例如virtio总线事件函数virtio_uevent

代码位于: kernel: drivers\virtio\virtio.c

static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
{
     struct virtio_device *dev = dev_to_virtio(_dv);

     return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
                     dev->id.device, dev->id.vendor);
}

下面函数工作流程如下:
1.
由设备对象往上查找,直到找到包含ksetkobject(总线包含着kset
2.
判断kobject对象是否提供filternameuevent函数,如果提供,调用它。
3.
分配一个kobj_uevent_env,并开始填充env环境变量:ACTION,DEVPATH,SUBSYSTEM,SEQNUM,MODALIAS
4.
通过netlink发送到用户空间

register_virtio_device ->device_register -> device_add -> kobject_uevent(KOBJ_ADD) -> kobject_uevent_env

 

int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
                 char *envp_ext[])
{
     /* search the kset we belong to */
     top_kobj = kobj;
     while (!top_kobj->kset && top_kobj->parent)
          top_kobj = top_kobj->parent;

     kset = top_kobj->kset;
     uevent_ops = kset->uevent_ops;

     /* skip the event, if the filter returns zero. */
     if (uevent_ops && uevent_ops->filter)
          if (!uevent_ops->filter(kset, kobj)) {
               pr_debug("kobject: '%s' (%p): %s: filter function "
                    "caused the event to drop!\n",
                    kobject_name(kobj), kobj, __func__);
               return 0;
          }

     /* originating subsystem */
     if (uevent_ops && uevent_ops->name)
          subsystem = uevent_ops->name(kset, kobj);
     else
          subsystem = kobject_name(&kset->kobj);

     /* environment buffer */
     env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
     if (!env)
          return -ENOMEM;

     /* complete object path */
     devpath = kobject_get_path(kobj, GFP_KERNEL);
     if (!devpath) {
          retval = -ENOENT;
          goto exit;
     }

     /* default keys */
     retval = add_uevent_var(env, "ACTION=%s", action_string);
     if (retval)
          goto exit;
     retval = add_uevent_var(env, "DEVPATH=%s", devpath);
     if (retval)
          goto exit;
     retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem);
     if (retval)
          goto exit;

     /* keys passed in from the caller */
     if (envp_ext) {
          for (i = 0; envp_ext[i]; i++) {
               retval = add_uevent_var(env, "%s", envp_ext[i]);
               if (retval)
                    goto exit;
          }
     }

     /* let the kset specific function add its stuff */
     if (uevent_ops && uevent_ops->uevent) {
          retval = uevent_ops->uevent(kset, kobj, env);
          if (retval) {
               pr_debug("kobject: '%s' (%p): %s: uevent() returned "
                    "%d\n", kobject_name(kobj), kobj,
                    __func__, retval);
               goto exit;
          }
     }

     /*
     * Mark "add" and "remove" events in the object to ensure proper
     * events to userspace during automatic cleanup. If the object did
     * send an "add" event, "remove" will automatically generated by
     * the core, if not already done by the caller.
     */
     if (action == KOBJ_ADD)
          kobj->state_add_uevent_sent = 1;
     else if (action == KOBJ_REMOVE)
          kobj->state_remove_uevent_sent = 1;

     mutex_lock(&uevent_sock_mutex);
     /* we will send an event, so request a new sequence number */
     retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)++uevent_seqnum);
     if (retval) {
          mutex_unlock(&uevent_sock_mutex);
          goto exit;
     }

#if defined(CONFIG_NET)
     /* send netlink message */
     list_for_each_entry(ue_sk, &uevent_sock_list, list) {
          /* allocate message with the maximum possible size */
          len = strlen(action_string) + strlen(devpath) + 2;
          skb = alloc_skb(len + env->buflen, GFP_KERNEL);
          if (skb) {
               char *scratch;

               /* add header */
               scratch = skb_put(skb, len);
               sprintf(scratch, "%s@%s", action_string, devpath);

               /* copy keys to our continuous event payload buffer */
               for (i = 0; i < env->envp_idx; i++) {
                    len = strlen(env->envp[i]) + 1;
                    scratch = skb_put(skb, len);
                    strcpy(scratch, env->envp[i]);
               }

               NETLINK_CB(skb).dst_group = 1;
               retval = netlink_broadcast_filtered(uevent_sock, skb,
                                       0, 1, GFP_KERNEL,
                                       kobj_bcast_filter,
                                       kobj);
               /* ENOBUFS should be handled in userspace */
               if (retval == -ENOBUFS || retval == -ESRCH)
                    retval = 0;
          } else
               retval = -ENOMEM;
     }
#endif
     mutex_unlock(&uevent_sock_mutex);

}

 

用户空间
    
当发送信息达到了用户空间,用户空间的udevd守护进程,接受到此信息。在udev规则文件里匹配,相应的规则。

 

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值