在drivers/pci/probe.c 中的pci_init_capabilities函数中调用pci_iov_init 来进行sriov的初始化
int pci_iov_init(struct pci_dev *dev)
{
int pos;
if (!pci_is_pcie(dev))
return -ENODEV;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
if (pos)
return sriov_init(dev, pos);
return -ENODEV;
}
在pci_iov_init 中首先通过pci_is_pcie 来判断是不是pcie设备,因为只有pcie设备有sriov,pci设备是没有的。
其次通过pci_find_ext_capability 来查找看这款设备是否具有sriov 这个feature。如果有的话,就调用sriov_init 来做vf的初始化
static int sriov_init(struct pci_dev *dev, int pos)
{
int i, bar64;
int rc;
int nres;
u32 pgsz;
u16 ctrl, total;
struct pci_sriov *iov;
struct resource *res;
struct pci_dev *pdev;
pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
if (ctrl & PCI_SRIOV_CTRL_VFE) {
pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
ssleep(1);
}
ctrl = 0;
list_for_each_entry(pdev, &dev->bus->devices, bus_list)
if (pdev->is_physfn)
goto found;
pdev = NULL;
if (pci_ari_enabled(dev->bus))
ctrl |= PCI_SRIOV_CTRL_ARI;
found:
pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
/从配置空间中找到当前支持几个vf
pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
if (!total)
return 0;
pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
pgsz &= ~((1 << i) - 1);
if (!pgsz)
return -EIO;
pgsz &= ~(pgsz - 1);
pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
//申请一个struct pci_sriov *iov;
iov = kzalloc(sizeof(*iov), GFP_KERNEL);
if (!iov)
return -ENOMEM;
nres = 0;
//这个for循环是给vf的bar 空间赋值
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
/*
* If it is already FIXED, don't change it, something
* (perhaps EA or header fixups) wants it this way.
*/
if (res->flags & IORESOURCE_PCI_FIXED)
bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
else
bar64 = __pci_read_base(dev, pci_bar_unknown, res,
pos + PCI_SRIOV_BAR + i * 4);
if (!res->flags)
continue;
if (resource_size(res) & (PAGE_SIZE - 1)) {
rc = -EIO;
goto failed;
}
iov->barsz[i] = resource_size(res);
res->end = res->start + resource_size(res) * total - 1;
dev_info(&dev->dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
i, res, i, total);
i += bar64;
nres++;
}
//iov的其他成员赋值
iov->pos = pos;
iov->nres = nres;
iov->ctrl = ctrl;
iov->total_VFs = total;
iov->pgsz = pgsz;
iov->self = dev;
pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
if (pdev)
iov->dev = pci_dev_get(pdev);
else
iov->dev = dev;
mutex_init(&iov->lock);
//将iov赋值给dev->sriov,这个dev代表当前的pcie设备也就是pf.
dev->sriov = iov;
dev->is_physfn = 1;
rc = compute_max_vf_buses(dev);
if (rc)
goto fail_max_buses;
return 0;
fail_max_buses:
dev->sriov = NULL;
dev->is_physfn = 0;
failed:
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
res->flags = 0;
}
kfree(iov);
return rc;
}
最后调用compute_max_vf_buses 给每个vf 的PCI_SRIOV_NUM_VF/PCI_SRIOV_VF_OFFSET/PCI_SRIOV_VF_STRIDE 赋值
static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
{
struct pci_sriov *iov = dev->sriov;
pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
}
/*
* The PF consumes one bus number. NumVFs, First VF Offset, and VF Stride
* determine how many additional bus numbers will be consumed by VFs.
*
* Iterate over all valid NumVFs, validate offset and stride, and calculate
* the maximum number of bus numbers that could ever be required.
*/
static int compute_max_vf_buses(struct pci_dev *dev)
{
struct pci_sriov *iov = dev->sriov;
int nr_virtfn, busnr, rc = 0;
for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
pci_iov_set_numvfs(dev, nr_virtfn);
if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
rc = -EIO;
goto out;
}
busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
if (busnr > iov->max_VF_buses)
iov->max_VF_buses = busnr;
}
out:
pci_iov_set_numvfs(dev, 0);
return rc;
}
int pci_iov_init(struct pci_dev *dev)
{
int pos;
if (!pci_is_pcie(dev))
return -ENODEV;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
if (pos)
return sriov_init(dev, pos);
return -ENODEV;
}
在pci_iov_init 中首先通过pci_is_pcie 来判断是不是pcie设备,因为只有pcie设备有sriov,pci设备是没有的。
其次通过pci_find_ext_capability 来查找看这款设备是否具有sriov 这个feature。如果有的话,就调用sriov_init 来做vf的初始化
static int sriov_init(struct pci_dev *dev, int pos)
{
int i, bar64;
int rc;
int nres;
u32 pgsz;
u16 ctrl, total;
struct pci_sriov *iov;
struct resource *res;
struct pci_dev *pdev;
pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
if (ctrl & PCI_SRIOV_CTRL_VFE) {
pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
ssleep(1);
}
ctrl = 0;
list_for_each_entry(pdev, &dev->bus->devices, bus_list)
if (pdev->is_physfn)
goto found;
pdev = NULL;
if (pci_ari_enabled(dev->bus))
ctrl |= PCI_SRIOV_CTRL_ARI;
found:
pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
/从配置空间中找到当前支持几个vf
pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
if (!total)
return 0;
pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
pgsz &= ~((1 << i) - 1);
if (!pgsz)
return -EIO;
pgsz &= ~(pgsz - 1);
pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
//申请一个struct pci_sriov *iov;
iov = kzalloc(sizeof(*iov), GFP_KERNEL);
if (!iov)
return -ENOMEM;
nres = 0;
//这个for循环是给vf的bar 空间赋值
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
/*
* If it is already FIXED, don't change it, something
* (perhaps EA or header fixups) wants it this way.
*/
if (res->flags & IORESOURCE_PCI_FIXED)
bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
else
bar64 = __pci_read_base(dev, pci_bar_unknown, res,
pos + PCI_SRIOV_BAR + i * 4);
if (!res->flags)
continue;
if (resource_size(res) & (PAGE_SIZE - 1)) {
rc = -EIO;
goto failed;
}
iov->barsz[i] = resource_size(res);
res->end = res->start + resource_size(res) * total - 1;
dev_info(&dev->dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
i, res, i, total);
i += bar64;
nres++;
}
//iov的其他成员赋值
iov->pos = pos;
iov->nres = nres;
iov->ctrl = ctrl;
iov->total_VFs = total;
iov->pgsz = pgsz;
iov->self = dev;
pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
if (pdev)
iov->dev = pci_dev_get(pdev);
else
iov->dev = dev;
mutex_init(&iov->lock);
//将iov赋值给dev->sriov,这个dev代表当前的pcie设备也就是pf.
dev->sriov = iov;
dev->is_physfn = 1;
rc = compute_max_vf_buses(dev);
if (rc)
goto fail_max_buses;
return 0;
fail_max_buses:
dev->sriov = NULL;
dev->is_physfn = 0;
failed:
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
res->flags = 0;
}
kfree(iov);
return rc;
}
最后调用compute_max_vf_buses 给每个vf 的PCI_SRIOV_NUM_VF/PCI_SRIOV_VF_OFFSET/PCI_SRIOV_VF_STRIDE 赋值
static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
{
struct pci_sriov *iov = dev->sriov;
pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
}
/*
* The PF consumes one bus number. NumVFs, First VF Offset, and VF Stride
* determine how many additional bus numbers will be consumed by VFs.
*
* Iterate over all valid NumVFs, validate offset and stride, and calculate
* the maximum number of bus numbers that could ever be required.
*/
static int compute_max_vf_buses(struct pci_dev *dev)
{
struct pci_sriov *iov = dev->sriov;
int nr_virtfn, busnr, rc = 0;
for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
pci_iov_set_numvfs(dev, nr_virtfn);
if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
rc = -EIO;
goto out;
}
busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
if (busnr > iov->max_VF_buses)
iov->max_VF_buses = busnr;
}
out:
pci_iov_set_numvfs(dev, 0);
return rc;
}
总结协一下。sriov的初始化就是给vf的bar空间赋值,并决定当前pf支持几个vf,并给每个vf编个号,sriov_init 只是初始化,如果需要使用vf的话,还必须主动调用pci_enable_sriov来指定需要启动哪个vf