Qemu中PCI设备透传(PCI-Assign)源码分析

本文详细分析了Qemu中PCI设备透传(PCI-Assign)的实现过程,包括PCI-Assign的基本使用步骤、PCI设备与pci-stub驱动的绑定、Qemu中的pci-assign设备注册和内存资源分配等。通过阅读源码揭示了PCI设备透传的工作原理,指出透传主要在内存区域实现,配置区域和中断处理仍依赖软件参与。
摘要由CSDN通过智能技术生成

在网上看到很多人说QemuPCI-Assign透传不支持IOMMU,而VFIO透传却可以(还被当做一种优势进行推荐)。而VFIOSRIOV并非有必然联系,那就是说VFIOPCI-Assign进本都是靠软件实现的了?既然都是软件实现的,为啥PCI-Assign不可以,而VFIO可以呢?这不科学啊!从来也没人说清楚这件事,到源码里看一下吧!(最后发现他们说的都是错的,网上无根据的文章不要乱信)

PCI-Assign透传的基本使用步骤

随便在网上找一个PCI-Assign透传的使用方法:

How to use 'pci pass-through' to run Linux in Qemu accessing real Ath9k adapter
===============================================================================

# Boot kernel with 'intel_iommu=on'

# Unbind driver from the device and bind 'pci-stub' to it
$ echo "168c 0030" > /sys/bus/pci/drivers/pci-stub/new_id
$ echo 0000:0b:00.0 > /sys/bus/pci/devices/0000:0b:00.0/driver/unbind
$ echo 0000:0b:00.0 > /sys/bus/pci/drivers/pci-stub/bind

# Then just run
$ sudo qemu-system-i386 -m 1024 \
	-device pci-assign,host=0b:00.0,rombar=0 \
	-enable-kvm \
	-kernel $KERNEL \
	-hda $DISK \
	-boot c \
	-append "root=/dev/sda rw"

# In case of
qemu-system-i386: -device pci-assign,host=0b:00.0: Failed to assign device "(null)" : Operation not permitted
qemu-system-i386: -device pci-assign,host=0b:00.0: Device initialization failed.
qemu-system-i386: -device pci-assign,host=0b:00.0: Device 'kvm-pci-assign' could not be initialized

$ dmesg | tail
[  112.129138] kvm_iommu_map_guest: No interrupt remapping support, disallowing device assignment. Re-enble with "allow_unsafe_assigned_interrupts=1" module option.

# run
$ echo 1 > /sys/module/kvm/parameters/allow_unsafe_assigned_interrupts

总结一下基本步骤:

  1. 在内核启动参数中配置intel_iommu=on,开启IOMMU(从这里就可以看出,PCI-Assign不支持IOMMU的话,要配置IOMMU干什么呢?);
  2. pci-stub驱动的new_id文件写入要绑定设备的VendorIDDeviceID
  3. PCI设备与原有驱动解绑;
  4. PCI设备绑定到pci-stub驱动上。
  5. Qemu上以PCI设备的PCI地址为形参启动虚拟机。

疑问:我们知道用户态程序使用一个设备,必须要使用它的用户态接口,通常就是一个设备文件,而这里指定一个地址是什么意思?Qemu是如何使用这个PCI设备的?

PCI设备与pci-stub驱动绑定

接下来我们就看看这个pci-stub驱动到底是怎么绑定到PCI设备上的。

驱动内核模块加载时绑定

  • 这是内核中pci-stub驱动的源码,如果在模块加载时指定了ids参数,将会在加载时为这个pci-stub驱动动态增加匹配的VendorIDDeviceID。我们知道,Linux内核的DeviceDriverBus驱动模型中,PCI总线是通过VendorIDDeviceID进行匹配的,因此这时就可对应的PCI设备进行了匹配绑定。同时,这个驱动啥都没干,没有提供任何上层的应用接口,比如说字符设备或者块设备等(drivers/pci/pci-stub.c):
// SPDX-License-Identifier: GPL-2.0
/*
 * Simple stub driver to reserve a PCI device
 *
 * Copyright (C) 2008 Red Hat, Inc.
 * Author:
 *	Chris Wright
 *
 * Usage is simple, allocate a new id to the stub driver and bind the
 * device to it.  For example:
 *
 * # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id
 * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
 * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind
 * # ls -l /sys/bus/pci/devices/0000:00:19.0/driver
 * .../0000:00:19.0/driver -> ../../../bus/pci/drivers/pci-stub
 */

#include <linux/module.h>
#include <linux/pci.h>

static char ids[1024] __initdata;

module_param_string(ids, ids, sizeof(ids), 0);
MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the stub driver, format is "
		 "\"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\""
		 " and multiple comma separated entries can be specified");

static int pci_stub_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
	pci_info(dev, "claimed by stub\n");
	return 0;
}

static struct pci_driver stub_driver = {
	.name		= "pci-stub",
	.id_table	= NULL,	/* only dynamic id's */
	.probe		= pci_stub_probe,
};

static int __init pci_stub_init(void)
{
	char *p, *id;
	int rc;

	rc = pci_register_driver(&stub_driver);
	if (rc)
		return rc;

	/* no ids passed actually */
	if (ids[0] == '\0')
		return 0;

	/* add ids specified in the module parameter */
	p = ids;
	while ((id = strsep(&p, ","))) {
		unsigned int vendor, device, subvendor = PCI_ANY_ID,
			subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
		int fields;

		if (!strlen(id))
			continue;

		fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
				&vendor, &device, &subvendor, &subdevice,
				&class, &class_mask);

		if (fields < 2) {
			printk(KERN_WARNING
			       "pci-stub: invalid id string \"%s\"\n", id);
			continue;
		}

		printk(KERN_INFO
		       "pci-stub: add %04X:%04X sub=%04X:%04X cls=%08X/%08X\n",
		       vendor, device, subvendor, subdevice, class, class_mask);

		rc = pci_add_dynid(&stub_driver, vendor, device,
				   subvendor, subdevice, class, class_mask, 0);
		if (rc)
			printk(KERN_WARNING
			       "pci-stub: failed to add dynamic id (%d)\n", rc);
	}

	return 0;
}

static void __exit pci_stub_exit(void)
{
	pci_unregister_driver(&stub_driver);
}

module_init(pci_stub_init);
module_exit(pci_stub_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Chris Wright <chrisw@sous-sol.org>");
  • 我们再看看pci_add_dynid这个函数的实现,他申请并初始化了一个pci_dynid结构,增加到Driver的一个链表中,最后调用了driver_attach函数触发PCI BusDeviceDriver匹配(drivers/pci/pci-driver.c):
/**
 * pci_add_dynid - add a new PCI device ID to this driver and re-probe devices
 * @drv: target pci driver
 * @vendor: PCI vendor ID
 * @device: PCI device ID
 * @subvendor: PCI subvendor ID
 * @subdevice: PCI subdevice ID
 * @class: PCI class
 * @class_mask: PCI class mask
 * @driver_data: private driver data
 *
 * Adds a new dynamic pci device ID to this driver and causes the
 * driver to probe for all devices again.  @drv must have been
 * registered prior to calling this function.
 *
 * CONTEXT:
 * Does GFP_KERNEL allocation.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int pci_add_dynid(struct pci_driver *drv,
		  unsigned int vendor, unsigned int device,
		  unsigned int subvendor, unsigned int subdevice,
		  unsigned int class, unsigned int class_mask,
		  unsigned long driver_data)
{
	struct pci_dynid *dynid;

	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
	if (!dynid)
		return -ENOMEM;

	dynid->id.vendor = vendor;
	dynid->id.device = device;
	dynid->id.subvendor = subvendor;
	dynid->id.subdevice = subdevice;
	dynid->id.class = class;
	dynid->id.class_mask = class_mask;
	dynid->id.driver_data = driver_data;

	spin_lock(&drv->dynids.lock);
	list_add_tail(&dynid->node, &drv->dynids.list);
	spin_unlock(&drv->dynids.lock);

	return driver_attach(&drv->driver);
}
EXPORT_SYMBOL_GPL(pci_add_dynid);
  • driver_attach遍历PCI Bus上的所有Device,使用VendorIDDeviceIDDriver进行匹配(匹配方法由Bus驱动提供)和绑定(drivers/base/dd.c):
static int __driver_attach(struct device *dev, void *data)
{
	struct device_driver *drv = data;
	int ret;

	/*
	 * Lock device and try to bind to it. We drop the error
	 * here and always return 0, because we need to keep trying
	 * to bind to devices and some drivers will return an error
	 * simply if it didn't support the device.
	 *
	 * driver_probe_device() will spit a warning if there
	 * is an error.
	 */

	ret = driver_match_device(drv, dev);
	if (ret == 0) {
		/* no match */
		return 0;
	} else if (ret == -EPROBE_DEFER) {
		dev_dbg(dev, "Device match requests probe deferral\n");
		driver_deferred_probe_add(dev);
	} else if (ret < 0) {
		dev_dbg(dev, "Bus failed to match device: %d", ret);
		return ret;
	} /* ret > 0 means positive match */

	if (dev->parent)	/* Needed for USB */
		device_lock(dev->parent);
	device_lock(dev);
	if (!dev->driver)
		driver_probe_device(drv, dev);
	device_unlock(dev);
	if (dev->parent)
		device_unlock(dev->parent);

	return 0;
}

/**
 * driver_attach - try to bind driver to devices.
 * @drv: driver.
 *
 * Walk the list of devices that the bus has on it and try to
 * match the driver with each one.  If driver_probe_device()
 * returns 0 and the @dev->driver is set, we've found a
 * compatible pair.
 */
int driver_attach(struct device_driver *drv)
{
	return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
}
EXPORT_SYMBOL_GPL(driver_attach);

通过设备的sysfs文件绑定

  • PCI Bus驱动在注册时分别注册了三个(BusDeviceDriver)的SysFS属性组,而DriverSysFS属性组包含new_id这个SysFS属性文件。在对这个文件的进行设置的函数new_id_store中,写入数据中解析出VendorIDDeviceID,最后调用pci_add_dynid进行与设备的绑定(drivers/pci/pci-driver.c):
/**
 * store_new_id - sysfs frontend to pci_add_dynid()
 * @driver: target device driver
 * @buf: buffer for scanning device ID data
 * @count: input size
 *
 * Allow PCI IDs to be added to an existing driver via sysfs.
 */
static ssize_t new_id_store(struct device_driver *driver, const char *buf,
			    size_t count)
{
	struct pci_driver *pdrv = to_pci_driver(driver);
	const struct pci_device_id *ids = pdrv->id_table;
	__u32 vendor, device, subvendor = PCI_ANY_ID,
		subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
	unsigned long driver_data = 0;
	int fields = 0;
	int retval = 0;

	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
			&vendor, &device, &subvendor, &subdevice,
			&class, &class_mask, &driver_data);
	if (fields < 2)
		return -EINVAL;

	if (fields != 7) {
		struct pci_dev *pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);
		if (!pdev)
			return -ENOMEM;

		pdev->vendor = vendor;
		pdev->device = device;
		pdev->subsystem_vendor = subvendor;
		pdev->subsystem_device = subdevice;
		pdev->class = class;

		if (pci_match_id(pdrv->id_table, pdev))
			retval = -EEXIST;

		kfree(pdev);

		if (retval)
			return retval;
	}

	/* Only accept driver_data values that match an existing id_table
	   entry */
	if (ids) {
		retval = -EINVAL;
		while (ids->vendor || ids->subvendor || ids->class_mask) {
			if (driver_data == ids->driver_data) {
				retval = 0;
				break;
			}
			ids++;
		}
		if (retval)	/* No match */
			return retval;
	}

	retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
			       class, class_mask, driver_data);
	if (retval)
		return retval;
	return count;
}
static DRIVER_ATTR_WO(new_id);


static struct attribute *pci_drv_attrs[] = {
	&driver_attr_new_id.attr,
	&driver_attr_remove_id.attr,
	NULL,
};
ATTRIBUTE_GROUPS(pci_drv);


struct bus_type pci_bus_type = {
	.name		= "pci",
	.match		= pci_bus_match,
	.uevent		= pci_uevent,
	.probe		= pci_device_probe,
	.remove		= pci_device_remove,
	.shutdown	= pci_device_shutdown,
	.dev_groups	= pci_dev_groups,
	.bus_groups	= pci_bus_groups,
	.drv_groups	= pci_drv_groups,
	.pm		= PCI_PM_OPS_PTR,
	.num_vf		= pci_bus_num_vf,
	.force_dma	= true,
};
EXPORT_SYMBOL(pci_bus_type);

static int __init pci_driver_init(void)
{
	return bus_register(&pci_bus_type);
}
postcore_initcall(pci_driver_init);

Qemu中的pci-assign设备

  • 快速查一下Qemu源码中引用pci-assign的位置,我们要看的源码应该就在hw/i386/kvm/pci-assign.c这个位置(至于为什么?
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值