virtio pci设备基础

这段时间又再次revisit了一把virtio,把笔记整理一下贴出来,大部分内容都是网上找的,+上我个人的一些理解在里面


我们首先关注virtio设备的配置空间,virtio设备本身是基于PCI总线的,因此本质上就是一个PCI设备,和所有其他PCI设备一样,virtio也有自己的vendor ID 0x1AF4,device ID从0x1000 - 0x103F,subsystem device ID如下:

Subsystem Device ID Virtio Device
1 Network card
2 Block device
3 Console
4 Entropy source
5 Memory ballooning
6 IoMemory
7 Rpmsg
8 SCSI host
9 9P transport
10 Mac80211 wlan

virtio设备的第一块IO region(BAR0指向的空间?)用来存放virtio设备的配置空间,如下所示:

Bits 32 32 32 16 16 16 8 8
R/W R R+W R+W R R+W R+W R+W R
Purpose Device Features Guest
Features
Queue
Address
Queue
Size
Queue Select Queue Notify Device Status ISR Status

Bits 16 16
R/W R+W R+W
Purpose(MSI-X) Configuration Vector Queue Vector

如果配置空间包含了后面两个域,即CONFIG_VECTOR以及QUEUE_VECTOR,表明这个PCI设备开启了MSI-X中断,否则后面两个域不会在配置空间中。内核定义了一个VIRTIO_PCI_CONFIG宏,用于计算配置空间的大小,如果开启了MSI-X中断则是24字节,否则是20字节


 
 
  1. /* The remaining space is defined by each driver as the per-driver
  2. * configuration space */
  3. #define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20)

可以从内核include/linux/virtio_pci.h中找到virtio配置空间的定义代码


 
 
  1. /* A 32-bit r/o bitmask of the features supported by the host */
  2. #define VIRTIO_PCI_HOST_FEATURES 0
  3. /* A 32-bit r/w bitmask of features activated by the guest */
  4. #define VIRTIO_PCI_GUEST_FEATURES 4
  5. /* A 32-bit r/w PFN for the currently selected queue */
  6. #define VIRTIO_PCI_QUEUE_PFN 8
  7. /* A 16-bit r/o queue size for the currently selected queue */
  8. #define VIRTIO_PCI_QUEUE_NUM 12
  9. /* A 16-bit r/w queue selector */
  10. #define VIRTIO_PCI_QUEUE_SEL 14
  11. /* A 16-bit r/w queue notifier */
  12. #define VIRTIO_PCI_QUEUE_NOTIFY 16
  13. /* An 8-bit device status register. */
  14. #define VIRTIO_PCI_STATUS 18
  15. /* An 8-bit r/o interrupt status register. Reading the value will return the
  16. * current contents of the ISR and will also clear it. This is effectively
  17. * a read-and-acknowledge. */
  18. #define VIRTIO_PCI_ISR 19
  19. /* The bit of the ISR which indicates a device configuration change. */
  20. #define VIRTIO_PCI_ISR_CONFIG 0x2
  21. /* MSI-X registers: only enabled if MSI-X is enabled. */
  22. /* A 16-bit vector for configuration changes. */
  23. #define VIRTIO_MSI_CONFIG_VECTOR 20
  24. /* A 16-bit vector for selected queue notifications. */
  25. #define VIRTIO_MSI_QUEUE_VECTOR 22
  26. /* Vector value used to disable MSI for queue */
  27. #define VIRTIO_MSI_NO_VECTOR 0xffff
在24/20字节之后,会存放设备自己的配置域,这里就不展开了


关于PCI的规范和细节,可以参考如下的文章

http://blog.chinaunix.net/uid-618506-id-204331.html

http://blog.sina.com.cn/s/blog_6472c4cc0100qnht.html

http://blog.csdn.net/yayong/article/details/4013299


按照我的理解,这里virtio设备的配置空间,和PCI设备的配置空间是完全不同的概念,virtio自己的配置实际上是占用的bar0指向的一块IO区域来完成的。对于传统的PCI设备,其配置空间是通过PCI规范严格定义好的,目前对于普通PCI设备是256个字节,对于PCIE设备是2k个字节,其中前64个字节称为PCI配置空间头,其定义如下

register (offset)bits 31-24bits 23-16bits 15-8bits 7-0
00Device IDVendor ID
04StatusCommand
08Class codeSubclassProg IFRevision ID
0CBISTHeader typeLatency TimerCache Line Size
10Base address #0 (BAR0)
14Base address #1 (BAR1)
18Secondary Latency TimerSubordinate Bus NumberSecondary Bus NumberPrimary Bus Number
1CSecondary StatusI/O LimitI/O Base
20Memory LimitMemory Base
24Prefetchable Memory LimitPrefetchable Memory Base
28Prefetchable Base Upper 32 Bits
2CPrefetchable Limit Upper 32 Bits
30I/O Limit Upper 16 BitsI/O Base Upper 16 Bits
34ReservedCapability Pointer
38Expansion ROM base address
3CBridge ControlInterrupt PINInterrupt Line


pci设备的配置空间可以通过pio或者mmio来访问,其中pio主要用于系统启动时的pci设备枚举,x86会有专门的寄存器来定义如何枚举,请参考相关资料。


host/guest的feature bits需要host和guest通过协商确定,相关的feature bit是根据具体的virtio设备不同而不同的,e.g. virtio_net, virtio_blk, virtio_balloon都有自己特定的feature bit,同时28-31位的feature bit被virtio_ring用来做同步


 
 
  1. /* Some virtio feature bits (currently bits 28 through 31) are reserved for the
  2. * transport being used (eg. virtio_ring), the rest are per-device feature
  3. * bits. */
  4. #define VIRTIO_TRANSPORT_F_START 28
  5. #define VIRTIO_TRANSPORT_F_END 32

目前用到的transport features,是VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX


device status目前有如下几类


 
 
  1. /* Status byte for guest to report progress, and synchronize features. */
  2. /* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
  3. #define VIRTIO_CONFIG_S_ACKNOWLEDGE 1
  4. /* We have found a driver for the device. */
  5. #define VIRTIO_CONFIG_S_DRIVER 2
  6. /* Driver has used its parts of the config, and is happy */
  7. #define VIRTIO_CONFIG_S_DRIVER_OK 4
  8. /* We've given up on this device. */
  9. #define VIRTIO_CONFIG_S_FAILED 0x80

对于设备的操作都在virtio_config_ops里面,其定义如下


 
 
  1. static struct virtio_config_ops virtio_pci_config_ops = {
  2. .get = vp_get,
  3. . set = vp_set,
  4. .get_status = vp_get_status,
  5. .set_status = vp_set_status,
  6. .reset = vp_reset,
  7. .find_vqs = vp_find_vqs,
  8. .del_vqs = vp_del_vqs,
  9. .get_features = vp_get_features,
  10. .finalize_features = vp_finalize_features,
  11. };

vp_get, vp_set最终都是通过ioread/iowrite操作来读取pci总线地址,这两个函数目前都是对于设备自己的配置做一些读写操作,因此都是在VIRTIO_PCI_CONFIG之后的空间进行


 
 
  1. /* virtio config->get() implementation */
  2. static void vp_get(struct virtio_device *vdev, unsigned offset,
  3. void *buf, unsigned len)
  4. {
  5. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  6. void __iomem *ioaddr = vp_dev->ioaddr +
  7. VIRTIO_PCI_CONFIG(vp_dev) + offset;
  8. u8 *ptr = buf;
  9. int i;
  10. for (i = 0; i < len; i++)
  11. ptr[i] = ioread8(ioaddr + i);
  12. }

 
 
  1. /* the config->set() implementation. it's symmetric to the config->get()
  2. * implementation */
  3. static void vp_set(struct virtio_device *vdev, unsigned offset,
  4. const void *buf, unsigned len)
  5. {
  6. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  7. void __iomem *ioaddr = vp_dev->ioaddr +
  8. VIRTIO_PCI_CONFIG(vp_dev) + offset;
  9. const u8 *ptr = buf;
  10. int i;
  11. for (i = 0; i < len; i++)
  12. iowrite8(ptr[i], ioaddr + i);
  13. }

vp_get_status, vp_set_status用于读写设备状态,由于device status总共只有1个字节,因此只需要一次ioread8/iowrite8即可。而vp_reset相当于把VIRTIO_PCI_STATUS写入0


 
 
  1. /* config->{get,set}_status() implementations */
  2. static u8 vp_get_status(struct virtio_device *vdev)
  3. {
  4. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  5. return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
  6. }
  7. static void vp_set_status(struct virtio_device *vdev, u8 status)
  8. {
  9. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  10. /* We should never be setting status to 0. */
  11. BUG_ON(status == 0);
  12. iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
  13. }

 
 
  1. static void vp_reset(struct virtio_device *vdev)
  2. {
  3.     struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  4.     /* 0 status means a reset. */
  5.     iowrite8( 0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
  6. }

vp_get_features, vp_finalize_features也类似,由于features是32bit的,因此调用ioread32/iowrite32来实现,vp_get_features用于获取host feature,因此会读取VIRTIO_PCI_HOST_FEATURES,vp_finalize_features用于配置guest features


virtio pci设备同样需要按照系统通用的pci初始化方式注册,初始化时调用pci_register_driver,结束时调用pci_unregister_driver


 
 
  1. static struct pci_driver virtio_pci_driver = {
  2. .name = "virtio-pci",
  3. .id_table = virtio_pci_id_table,
  4. .probe = virtio_pci_probe,
  5. .remove = virtio_pci_remove,
  6. #ifdef CONFIG_PM
  7. .driver.pm = &virtio_pci_pm_ops,
  8. #endif
  9. };
  10. static int __ init virtio_pci_init(void)
  11. {
  12. return pci_register_driver(&virtio_pci_driver);
  13. }
  14. module_init(virtio_pci_init);
  15. static void __ exit virtio_pci_exit(void)
  16. {
  17. pci_unregister_driver(&virtio_pci_driver);
  18. }
  19. module_exit(virtio_pci_exit);


下面来看看virtqueue,在virtio的机制中,前端和后端通过virtqueue来进行数据交换,virtqueue的初始化通过config->find_vqs来进行


 
 
  1. static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
  2. struct virtqueue *vqs[],
  3. vq_callback_t *callbacks[],
  4. const char *names[])
  5. {
  6. int err;
  7. /* Try MSI-X with one vector per queue. */
  8. err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
  9. if (!err)
  10. return 0;
  11. /* Fallback: MSI-X with one vector for config, one shared for queues. */
  12. err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
  13. true, false);
  14. if (!err)
  15. return 0;
  16. /* Finally fall back to regular interrupts. */
  17. return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
  18. false, false);
  19. }
可以看到vp_find_vqs是依次尝试不同的中断模式,具体实现都在函数vp_try_to_find_vqs里面,该函数由三个不同分支组成

1. 如果没有开启msix模式,则调用vp_request_intx申请一个中断,中断处理函数是vp_interrupt


 
 
  1. if (!use_msix) {
  2. /* Old style: one normal interrupt for change and all vqs. */
  3. err = vp_request_intx(vdev);
  4. if (err)
  5. goto error_request;
  6. } else {

vp_interrupt实际调用的是vp_vring_interrupt(配置变更的中断除外)


 
 
  1. static irqreturn_t vp_interrupt(int irq, void *opaque)
  2. {
  3. struct virtio_pci_device *vp_dev = opaque;
  4. u8 isr;
  5. /* reading the ISR has the effect of also clearing it so it's very
  6. * important to save off the value. */
  7. isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
  8. /* It's definitely not us if the ISR was not high */
  9. if (!isr)
  10. return IRQ_NONE;
  11. /* Configuration change? Tell driver if it wants to know. */
  12. if (isr & VIRTIO_PCI_ISR_CONFIG)
  13. vp_config_changed(irq, opaque);
  14. return vp_vring_interrupt(irq, opaque);
  15. }
vp_vring_interrupt会遍历virtio_pci_device的所有virtqueue(多个队列的设备),调用中断处理函数vring_interrupt,最终调用virtqueue注册的callback函数完成中断处理


 
 
  1. irqreturn_t vring_interrupt( int irq, void *_vq)
  2. {
  3. struct vring_virtqueue *vq = to_vvq(_vq);
  4. if (!more_used(vq)) {
  5. pr_debug( "virtqueue interrupt with no work for %p\n", vq);
  6. return IRQ_NONE;
  7. }
  8. if (unlikely(vq->broken))
  9. return IRQ_HANDLED;
  10. pr_debug( "virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
  11. if (vq->vq.callback)
  12. vq->vq.callback(&vq->vq);
  13. return IRQ_HANDLED;
  14. }

2. 开启了msix模式,还要区分不同的模式,要么是所有virtqueue共享一个中断,要么是每个virtqueue独立一个中断,无论是哪种模式,都需要调用vp_request_msix_vectors去申请irq中断资源。还要对每个virtqueue,调用setup_vq来完成初始化

vp_request_msix_vectors用于申请nvectors个中断,其中至少有一个config changed中断,处理函数为vp_config_changed,其余如果是共享模式,则所有队列共享一个msix中断,中断处理函数是vp_vring_interrupt


 
 
  1. } else {
  2. if (per_vq_vectors) {
  3. /* Best option: one for change interrupt, one per vq. */
  4. nvectors = 1;
  5. for (i = 0; i < nvqs; ++i)
  6. if (callbacks[i])
  7. ++nvectors;
  8. } else {
  9. /* Second best: one for change, shared for all vqs. */
  10. nvectors = 2;
  11. }
  12. err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
  13. if (err)
  14. goto error_request;
  15. }

对于每个virtqueue,都会调用setup_vq初始化对应的virtqueue,同时如果是per-vq中断的模式,还会调用request_irq分配中断资源,中断处理函数是vring_interrupt


 
 
  1. vp_dev->per_vq_vectors = per_vq_vectors;
  2. allocated_vectors = vp_dev->msix_used_vectors;
  3. for (i = 0; i < nvqs; ++i) {
  4. if (!callbacks[i] || !vp_dev->msix_enabled)
  5. msix_vec = VIRTIO_MSI_NO_VECTOR;
  6. else if (vp_dev->per_vq_vectors)
  7. msix_vec = allocated_vectors++;
  8. else
  9. msix_vec = VP_MSIX_VQ_VECTOR;
  10. vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
  11. if (IS_ERR(vqs[i])) {
  12. err = PTR_ERR(vqs[i]);
  13. goto error_find;
  14. }
  15. if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
  16. continue;
  17. /* allocate per-vq irq if available and necessary */
  18. snprintf(vp_dev->msix_names[msix_vec],
  19. sizeof *vp_dev->msix_names,
  20. "%s-%s",
  21. dev_name(&vp_dev->vdev.dev), names[i]);
  22. err = request_irq(vp_dev->msix_entries[msix_vec]. vector,
  23. vring_interrupt, 0,
  24. vp_dev->msix_names[msix_vec],
  25. vqs[i]);
  26. if (err) {
  27. vp_del_vq(vqs[i]);
  28. goto error_find;
  29. }
  30. }
  31. return 0;

其中setup_vq的函数如下:


 
 
  1. static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
  2. void (*callback) (struct virtqueue *vq),
  3. const char *name,
  4. u16 msix_vec)
  5. {
  6. struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  7. struct virtio_pci_vq_info *info;
  8. struct virtqueue *vq;
  9. unsigned long flags, size;
  10. u16 num;
  11. int err;
  12. /* Select the queue we're interested in */ /* 把要配置的queue的index写入配置空间地址 */
  13. iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
  14. /* Check if queue is either not available or already active. */ /* num=0说明queue不可用,否则说明地址非空,已经被占用了 */
  15. num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
  16. if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
  17. return ERR_PTR(-ENOENT);
  18. /* allocate and fill out our structure the represents an active
  19. * queue */
  20. info = kmalloc( sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
  21. if (!info)
  22. return ERR_PTR(-ENOMEM);
  23. info->queue_index = index; /* 队列index */
  24. info->num = num; /* vring size, vring_desc个数 */
  25. info->msix_vector = msix_vec;
  26. size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
  27. info-> queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); /* vring分配空间 */
  28. if (info-> queue == NULL) {
  29. err = -ENOMEM;
  30. goto out_info;
  31. }
  32. /* activate the queue */ /* 把vring的地址写入pci配置空间,触发trap使得qemu可以通知到 */
  33. iowrite32(virt_to_phys(info-> queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
  34. vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
  35. /* create the vring */ /* 创建vring_virqueue,把vring封装在virtqueue里面 */
  36. vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
  37. vdev, info-> queue, vp_notify, callback, name);
  38. if (!vq) {
  39. err = -ENOMEM;
  40. goto out_activate_queue;
  41. }
  42. vq->priv = info; /* virtqueue->priv指向virtio_pci_vq_info */
  43. info->vq = vq; /* virtio_pci_vq_info->vq指向新创建的virtqueue */
  44. if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
  45. iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
  46. msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
  47. if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
  48. err = -EBUSY;
  49. goto out_assign;
  50. }
  51. }
  52. spin_lock_irqsave(&vp_dev->lock, flags);
  53. list_add(&info->node, &vp_dev->virtqueues);
  54. spin_unlock_irqrestore(&vp_dev->lock, flags);
  55. return vq;
  56. out_assign:
  57. vring_del_virtqueue(vq);
  58. out_activate_queue:
  59. iowrite32( 0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
  60. free_pages_exact(info-> queue, size);
  61. out_info:
  62. kfree(info);
  63. return ERR_PTR(err);
  64. }



评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值