X86上pcie的初始化枚举流程,基于kernel 5.2.9分析
`只是一个看代码笔记,仅供参考`
pcie的代码在kernel里面大致分两部分初始化的:
一部分和cpu的框架密切相关,使用arch_initcall初始化,x86的初始化代码位于init.c arch\x86\pci,arch_initcall(pci_arch_init); 主要是检测pci type, 设置全局的config 空间read/write函数。
另一部分是pcie subsystem的初始化,使用subsys_initcall 初始化,x86的代码位于legacy.c arch\x86\pci里面,前面适合x86相关的,最终会调用pci_scan_root_bus来扫描初始化总线下所有的bridge和ep设备。
A. arch_initcall(pci_arch_init)的初始化:
探测bus总线,设置read,write函数
函数pci_arch_init:
/* arch_initcall has too random ordering, so call the initializers
in the right sequence from here. */
static __init int pci_arch_init(void)
{
#ifdef CONFIG_PCI_DIRECT
int type = 0;
type = pci_direct_probe();
#endif
if (!(pci_probe & PCI_PROBE_NOEARLY))
pci_mmcfg_early_init();
if (x86_init.pci.arch_init && !x86_init.pci.arch_init())
return 0;
#ifdef CONFIG_PCI_BIOS
pci_pcbios_init();
#endif
/*
* don't check for raw_pci_ops here because we want pcbios as last
* fallback, yet it's needed to run first to set pcibios_last_bus
* in case legacy PCI probing is used. otherwise detecting peer busses
* fails.
*/
#ifdef CONFIG_PCI_DIRECT
pci_direct_init(type); / 下面有具体的函数 /
#endif
if (!raw_pci_ops && !raw_pci_ext_ops)
printk(KERN_ERR
"PCI: Fatal: No config space access function found\n");
dmi_check_pciprobe();
dmi_check_skip_isa_align();
return 0;
}
arch_initcall(pci_arch_init);
函数pci_direct_init:
void __init pci_direct_init(int type)
{
if (type == 0)
return;
printk(KERN_INFO "PCI: Using configuration type %d for base access\n",
type);
if (type == 1) {
raw_pci_ops = &pci_direct_conf1;
if (raw_pci_ext_ops)
return;
if (!(pci_probe & PCI_HAS_IO_ECS))
return;
printk(KERN_INFO "PCI: Using configuration type 1 "
"for extended access\n");
raw_pci_ext_ops = &pci_direct_conf1;/ 结构体的函数实现在下面 /
return;
}
raw_pci_ops = &pci_direct_conf2;
}
pci_direct_conf1 结构体定义及回调函数
const struct pci_raw_ops pci_direct_conf1 = {
.read = pci_conf1_read,
.write = pci_conf1_write,
};
static int pci_conf1_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
if (seg || (bus > 255) || (devfn > 255) || (reg > 4095)) {
*value = -1;
return -EINVAL;
}
raw_spin_lock_irqsave(&pci_config_lock, flags);
/*
* #define PCI_CONF1_ADDRESS(bus, devfn, reg) \
* (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \
* | (devfn << 8) | (reg & 0xFC))
*/
outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8);
switch (len) {
case 1:
*value = inb(0xCFC + (reg & 3));
break;
case 2:
*value = inw(0xCFC + (reg & 2));
break;
case 4:
*value = inl(0xCFC);
break;
}
raw_spin_unlock_irqrestore(&pci_config_lock, flags);
return 0;
}
static int pci_conf1_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
if (seg || (bus > 255) || (devfn > 255) || (reg > 4095))
return -EINVAL;
raw_spin_lock_irqsave(&pci_config_lock, flags);
outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8);
switch (len) {
case 1:
outb((u8)value, 0xCFC + (reg & 3));
break;
case 2:
outw((u16)value, 0xCFC + (reg & 2));
break;
case 4:
outl((u32)value, 0xCFC);
break;
}
raw_spin_unlock_irqrestore(&pci_config_lock, flags);
return 0;
}
B. pcie read/write config 函数的注册流程:
pci_bus_read_config_xx and pci_bus_write_config_xx:
#define PCI_OP_READ(size, type, len) \
int noinline pci_bus_read_config_##size \
(struct pci_bus *bus, unsigned int devfn, int pos, type *value) \
{ \
int res; \
unsigned long flags; \
u32 data = 0; \
if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
pci_lock_config(flags); \
res = bus->ops->read(bus, devfn, pos, len, &data); \
*value = (type)data; \
pci_unlock_config(flags); \
return res; \
}
#define PCI_OP_WRITE(size, type, len) \
int noinline pci_bus_write_config_##size \
(struct pci_bus *bus, unsigned int devfn, int pos, type value) \
{ \
int res; \
unsigned long flags; \
if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
pci_lock_config(flags); \
res = bus->ops->write(bus, devfn, pos, len, value); \
pci_unlock_config(flags); \
return res; \
}
PCI_OP_READ(byte, u8, 1)
PCI_OP_READ(word, u16, 2)
PCI_OP_READ(dword, u32, 4)
PCI_OP_WRITE(byte, u8, 1)
PCI_OP_WRITE(word, u16, 2)
PCI_OP_WRITE(dword, u32, 4)
EXPORT_SYMBOL(pci_bus_read_config_byte);
EXPORT_SYMBOL(pci_bus_read_config_word);
EXPORT_SYMBOL(pci_bus_read_config_dword);
EXPORT_SYMBOL(pci_bus_write_config_byte);
EXPORT_SYMBOL(pci_bus_write_config_word);
EXPORT_SYMBOL(pci_bus_write_config_dword);
C. pcie subsystem的初始化流程
1.下面是pcie scan and add device的大概函数调用流程
subsys_initcall(pci_subsys_init);
pci_subsys_init()
pci_legacy_init()
pcibios_scan_root(0);
pci_scan_root_bus();
pci_create_root_bus(parent, bus, ops, sysdata, resources);
pci_alloc_host_bridge(0);//分配buffer
pci_register_host_bridge(bridge);//注册bridge的设备
pci_scan_child_bus(b);
pci_scan_child_bus_extend(bus, 0);
/ Scan devices below a bus,子函数太多,另起一行 /
pci_scan_child_bus_extend(bus, 0);//Scan devices below a bus
for (devfn = 0; devfn < 256; devfn += 8) {
pci_scan_slot(bus, devfn);
pci_scan_single_device(bus, devfn);
pci_get_slot(bus, devfn);
pci_scan_device(bus, devfn); /---- 很重要,下面有详细讲解 ---/
pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000)
pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)
pci_alloc_dev(bus);
pci_setup_device(dev);
set_pcie_port_type(dev);//读配置空间读capability获取信息
pci_dev_assign_slot(dev);
dev_set_name();
/接下来的代码是根据header type来设置或者pci 信息 /
pci_device_add(dev, bus); /--- 很重要,下面有详细讲解,根据pci的信息,配置pci设备,最后device_add(&dev->dev); ---/
pci_iov_bus_range(bus);/* find bus range used by Virtual Function,Reserve buses for SR-IOV capability */
for_each_pci_bridge(dev, bus) { //Scan bridges that are already configured. We don't touch them unless they are misconfigured
pci_scan_bridge_extend(bus, dev, max, 0, 0);
pci_find_bus(pci_domain_nr(bus), secondary);
pci_scan_child_bus(child); /重新执行一遍上面的初始化扫码过程,一级一级递归扫描设备直到最后/
for_each_pci_bridge(dev, bus) /* Scan bridges that need to be reconfigured */
pci_scan_bridge_extend(bus, dev, cmax, buses, 1);
2.下面是一些重要函数的详细代码
2.1 pci_subsys_init:
static int __init pci_subsys_init(void)
{
/*
* The init function returns an non zero value when
* pci_legacy_init should be invoked.
*/
if (x86_init.pci.init()) {
if (pci_legacy_init()) {
pr_info("PCI: System does not support PCI\n");
return -ENODEV;
}
}
pcibios_fixup_peer_bridges();
x86_init.pci.init_irq();
pcibios_init();
return 0;
}
subsys_initcall(pci_subsys_init);
2.2 pci_legacy_init:
int __init pci_legacy_init(void)
{
if (!raw_pci_ops)
return 1;
pr_info("PCI: Probing PCI hardware\n");
pcibios_scan_root(0);
return 0;
}
2.3 pcibios_scan_root:
void pcibios_scan_root(int busnum)
{
struct pci_bus *bus;
struct pci_sysdata *sd;
LIST_HEAD(resources);
sd = kzalloc(sizeof(*sd), GFP_KERNEL);
if (!sd) {
printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busnum);
return;
}
sd->node = x86_pci_root_bus_node(busnum);//获取当前bus所在的NUMA node 号
x86_pci_root_bus_resources(busnum, &resources);
printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);// 探测当前总线设备以及子总线、子总线设备
if (!bus) {
pci_free_resource_list(&resources);
kfree(sd);
return;
}
pci_bus_add_devices(bus);// 全部设备探测完毕,注册设备。
}
3. 关键函数的讲解:
3.00 pci_scan_device流程:
Read the config data for a PCI device, sanity-check it, and fill in the dev structure.
pci_scan_device();
pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000);
pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)
pci_alloc_dev(bus);
dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
INIT_LIST_HEAD(&dev->bus_list);
dev->dev.type = &pci_dev_type;
dev->bus = pci_bus_get(bus);
pci_setup_device(dev);
pci_hdr_type(dev);
set_pcie_port_type(dev);
pci_dev_assign_slot(dev);//通过devfn来计算出阿里一个slot号,保存到dev中
list_for_each_entry(slot, &dev->bus->slots, list)
if (PCI_SLOT(dev->devfn) == slot->number)
dev->slot = slot;
dev_set_name();
set_pcie_thunderbolt(dev);
set_pcie_untrusted(dev);
pci_fixup_device(pci_fixup_early, dev);/ Early fixups, before probing the BARs /
pci_intx_mask_broken(dev);/test whether PCI_COMMAND_INTX_DISABLE is writable /
case PCI_HEADER_TYPE_NORMAL: /* standard header */
pci_read_irq(dev);/Read interrupt line and base address registers./
pci_read_bases(dev, 6, PCI_ROM_ADDRESS);/get BARs info /
pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device);
case PCI_HEADER_TYPE_BRIDGE:
pci_read_irq(dev);
pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
pci_read_bridge_windows(dev);/get IO and MEMORY windows size /
set_pcie_hotplug_bridge(dev); /check whether support hotplug /
pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor);
pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device);
case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */
pci_read_irq(dev);
pci_read_bases(dev, 1, 0);
pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID,&dev->subsystem_vendor);
pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
3.01 pci_setup_device 具体函数内容
Initialize the device structure with information about the device's
vendor,class,memory and IO-space addresses, IRQ lines etc.
/**
* pci_setup_device - Fill in class and map information of a device
* @dev: the device structure to fill
*
* Initialize the device structure with information about the device's
* vendor,class,memory and IO-space addresses, IRQ lines etc.
* Called at initialisation of the PCI subsystem and by CardBus services.
* Returns 0 on success and negative if unknown type of device (not normal,
* bridge or CardBus).
*/
int pci_setup_device(struct pci_dev *dev)
{
u32 class;
u16 cmd;
u8 hdr_type;
int pos = 0;
struct pci_bus_region region;
struct resource *res;
hdr_type = pci_hdr_type(dev);
dev->sysdata = dev->bus->sysdata;
dev->dev.parent = dev->bus->bridge;
dev->dev.bus = &pci_bus_type;
dev->hdr_type = hdr_type & 0x7f;
dev->multifunction = !!(hdr_type & 0x80);
dev->error_state = pci_channel_io_normal;
set_pcie_port_type(dev);
pci_dev_assign_slot(dev);
/*
* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
* set this higher, assuming the system even supports it.
*/
dev->dma_mask = 0xffffffff;
dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
dev->bus->number, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
class = pci_class(dev);
dev->revision = class & 0xff;
dev->class = class >> 8; /* upper 3 bytes */
pci_info(dev, "[%04x:%04x] type %02x class %#08x\n",
dev->vendor, dev->device, dev->hdr_type, dev->class);
if (pci_early_dump)
early_dump_pci_device(dev);
/* Need to have dev->class ready */
dev->cfg_size = pci_cfg_space_size(dev);
/* Need to have dev->cfg_size ready */
set_pcie_thunderbolt(dev);
set_pcie_untrusted(dev);
/* "Unknown power state" */
dev->current_state = PCI_UNKNOWN;
/* Early fixups, before probing the BARs */
pci_fixup_device(pci_fixup_early, dev);
/* Device class may be changed after fixup */
class = dev->class >> 8;
if (dev->non_compliant_bars) {
pci_read_config_word(dev, PCI_COMMAND, &cmd);
if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
pci_info(dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
cmd &= ~PCI_COMMAND_IO;
cmd &= ~PCI_COMMAND_MEMORY;
pci_write_config_word(dev, PCI_COMMAND, cmd);
}
}
dev->broken_intx_masking = pci_intx_mask_broken(dev);
switch (dev->hdr_type) { /* header type */
case PCI_HEADER_TYPE_NORMAL: /* standard header */
if (class == PCI_CLASS_BRIDGE_PCI)
goto bad;
pci_read_irq(dev);
pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device);
/*
* Do the ugly legacy mode stuff here rather than broken chip
* quirk code. Legacy mode ATA controllers have fixed
* addresses. These are not always echoed in BAR0-3, and
* BAR0-3 in a few cases contain junk!
*/
if (class == PCI_CLASS_STORAGE_IDE) {
u8 progif;
pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
if ((progif & 1) == 0) {
region.start = 0x1F0;
region.end = 0x1F7;
res = &dev->resource[0];
res->flags = LEGACY_IO_RESOURCE;
pcibios_bus_to_resource(dev->bus, res, ®ion);
pci_info(dev, "legacy IDE quirk: reg 0x10: %pR\n",
res);
region.start = 0x3F6;
region.end = 0x3F6;
res = &dev->resource[1];
res->flags = LEGACY_IO_RESOURCE;
pcibios_bus_to_resource(dev->bus, res, ®ion);
pci_info(dev, "legacy IDE quirk: reg 0x14: %pR\n",
res);
}
if ((progif & 4) == 0) {
region.start = 0x170;
region.end = 0x177;
res = &dev->resource[2];
res->flags = LEGACY_IO_RESOURCE;
pcibios_bus_to_resource(dev->bus, res, ®ion);
pci_info(dev, "legacy IDE quirk: reg 0x18: %pR\n",
res);
region.start = 0x376;
region.end = 0x376;
res = &dev->resource[3];
res->flags = LEGACY_IO_RESOURCE;
pcibios_bus_to_resource(dev->bus, res, ®ion);
pci_info(dev, "legacy IDE quirk: reg 0x1c: %pR\n",
res);
}
}
break;
case PCI_HEADER_TYPE_BRIDGE: /* bridge header */
/*
* The PCI-to-PCI bridge spec requires that subtractive
* decoding (i.e. transparent) bridge must have programming
* interface code of 0x01.
*/
pci_read_irq(dev);
dev->transparent = ((dev->class & 0xff) == 1);
pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
pci_read_bridge_windows(dev);
set_pcie_hotplug_bridge(dev);
pos = pci_find_capability(dev, PCI_CAP_ID_SSVID);
if (pos) {
pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor);
pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device);
}
break;
case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */
if (class != PCI_CLASS_BRIDGE_CARDBUS)
goto bad;
pci_read_irq(dev);
pci_read_bases(dev, 1, 0);
pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
break;
default: /* unknown header */
pci_err(dev, "unknown header type %02x, ignoring device\n",
dev->hdr_type);
return -EIO;
bad:
pci_err(dev, "ignoring class %#08x (doesn't match header type %02x)\n",
dev->class, dev->hdr_type);
dev->class = PCI_CLASS_NOT_DEFINED << 8;
}
/* We found a fine healthy device, go go go... */
return 0;
}
3.02 pci_read_bases 函数内容:
static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
{
unsigned int pos, reg;
if (dev->non_compliant_bars)
return;
/* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */
if (dev->is_virtfn)
return;
/采用循环的方式调用__pci_read_base,第一个bar的地址是PCI_BASE_ADDRESS_0(0x10)/
for (pos = 0; pos < howmany; pos++) {
struct resource *res = &dev->resource[pos];
reg = PCI_BASE_ADDRESS_0 + (pos << 2);
pos += __pci_read_base(dev, pci_bar_unknown, res, reg); //分配资源的具体函数
}
if (rom) {
struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
dev->rom_base_reg = rom;
res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
__pci_read_base(dev, pci_bar_mem32, res, rom);
}
}
3.03 __pci_read_base :
/**
* pci_read_base - Read a PCI BAR
* @dev: the PCI device
* @type: type of the BAR
* @res: resource buffer to be filled in
* @pos: BAR position in the config space
*
* Returns 1 if the BAR is 64-bit, or 0 if 32-bit.
*/
int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
struct resource *res, unsigned int pos)
{
u32 l = 0, sz = 0, mask;
u64 l64, sz64, mask64;
u16 orig_cmd;
struct pci_bus_region region, inverted_region;
mask = type ? PCI_ROM_ADDRESS_MASK : ~0;/ 判断是device还是bridge /
/* No printks while decoding is disabled! */
if (!dev->mmio_always_on) {
pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
pci_write_config_word(dev, PCI_COMMAND,
orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
}
}
res->name = pci_name(dev);
`获取bar空间大小,x86上,bar的地址已经在bios里面设置到寄存器里面了,读出来的就是bar的首地址,`
`全部写1来确定size, 读取resource的flag`
pci_read_config_dword(dev, pos, &l);
pci_write_config_dword(dev, pos, l | mask);
pci_read_config_dword(dev, pos, &sz);
pci_write_config_dword(dev, pos, l);
/*
* All bits set in sz means the device isn't working properly.
* If the BAR isn't implemented, all bits must be 0. If it's a
* memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
* 1 must be clear.
*/
if (sz == 0xffffffff)
sz = 0;
/*
* I don't know how l can have all bits set. Copied from old code.
* Maybe it fixes a bug on some ancient platform.
*/
if (l == 0xffffffff)
l = 0;
if (type == pci_bar_unknown) {
res->flags = decode_bar(dev, l);
res->flags |= IORESOURCE_SIZEALIGN;
/根据flag判断是io还是mem/
if (res->flags & IORESOURCE_IO) {
l64 = l & PCI_BASE_ADDRESS_IO_MASK;
sz64 = sz & PCI_BASE_ADDRESS_IO_MASK;
mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT;
} else {
l64 = l & PCI_BASE_ADDRESS_MEM_MASK;
sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK;
mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK;
}
} else {
if (l & PCI_ROM_ADDRESS_ENABLE)
res->flags |= IORESOURCE_ROM_ENABLE;
l64 = l & PCI_ROM_ADDRESS_MASK;
sz64 = sz & PCI_ROM_ADDRESS_MASK;
mask64 = PCI_ROM_ADDRESS_MASK;
}
/如果是64 bit的话,就要继续读取高32bit/
if (res->flags & IORESOURCE_MEM_64) {
pci_read_config_dword(dev, pos + 4, &l);
pci_write_config_dword(dev, pos + 4, ~0);
pci_read_config_dword(dev, pos + 4, &sz);
pci_write_config_dword(dev, pos + 4, l);
l64 |= ((u64)l << 32);
sz64 |= ((u64)sz << 32);
mask64 |= ((u64)~0 << 32);
}
if (!dev->mmio_always_on && (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
if (!sz64)
goto fail;
sz64 = pci_size(l64, sz64, mask64);/获取size大小,对bar空间全部写一,回读,最低位为1的地方就是size/
if (!sz64) {
pci_info(dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n",
pos);
goto fail;
}
/合法性检测/
if (res->flags & IORESOURCE_MEM_64) {
if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
&& sz64 > 0x100000000ULL) {
res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
res->start = 0;
res->end = 0;
pci_err(dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n",
pos, (unsigned long long)sz64);
goto out;
}
if ((sizeof(pci_bus_addr_t) < 8) && l) {
/* Above 32-bit boundary; try to reallocate */
res->flags |= IORESOURCE_UNSET;
res->start = 0;
res->end = sz64 - 1;
pci_info(dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n",
pos, (unsigned long long)l64);
goto out;
}
}
region.start = l64;
region.end = l64 + sz64 - 1;
pcibios_bus_to_resource(dev->bus, res, ®ion); /下面有具体函数/
pcibios_resource_to_bus(dev->bus, &inverted_region, res);
/*
* If "A" is a BAR value (a bus address), "bus_to_resource(A)" is
* the corresponding resource address (the physical address used by
* the CPU. Converting that resource address back to a bus address
* should yield the original BAR value:
*
* resource_to_bus(bus_to_resource(A)) == A
*
* If it doesn't, CPU accesses to "bus_to_resource(A)" will not
* be claimed by the device.
*/
if (inverted_region.start != region.start) {
res->flags |= IORESOURCE_UNSET;
res->start = 0;
res->end = region.end - region.start;
pci_info(dev, "reg 0x%x: initial BAR value %#010llx invalid\n",
pos, (unsigned long long)region.start);
}
goto out;
fail:
res->flags = 0;
out:
if (res->flags)
pci_info(dev, "reg 0x%x: %pR\n", pos, res);
return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
}
检查地址的合法性:pcibios_resource_to_bus 和 pcibios_bus_to_resource
/* True iff r1 completely contains r2 */
static inline bool resource_contains(struct resource *r1, struct resource *r2)
{
if (resource_type(r1) != resource_type(r2))
return false;
if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET)
return false;
return r1->start <= r2->start && r1->end >= r2->end;
}
void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
struct resource *res)
{
struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
struct resource_entry *window;
resource_size_t offset = 0;
resource_list_for_each_entry(window, &bridge->windows) {
if (resource_contains(window->res, res)) {
offset = window->offset;
break;
}
}
region->start = res->start - offset;
region->end = res->end - offset;
}
EXPORT_SYMBOL(pcibios_resource_to_bus);
static bool region_contains(struct pci_bus_region *region1,
struct pci_bus_region *region2)
{
return region1->start <= region2->start && region1->end >= region2->end;
}
void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
struct pci_bus_region *region)
{
struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
struct resource_entry *window;
resource_size_t offset = 0;
resource_list_for_each_entry(window, &bridge->windows) {
struct pci_bus_region bus_region;
/遍历brideg的地址范围,check类型是否一致:io or mem/
if (resource_type(res) != resource_type(window->res))
continue;
bus_region.start = window->res->start - window->offset;
bus_region.end = window->res->end - window->offset;
/判断当前的region是否和已有的region是否有重叠/
if (region_contains(&bus_region, region)) {
offset = window->offset;
break;
}
}
/ 更新 res = &dev->resource[pos];/
res->start = region->start + offset;
res->end = region->end + offset;
}
3.10 pci_device_add(dev, bus) 流程:
pci_device_add(dev, bus);
pci_configure_device(dev);
pci_configure_mps(dev); /设置payload size, 以下4个都是通过pcie capability structure(id:10)来配置device/
pci_configure_extended_tags(dev, NULL);/config externed tags /
pci_configure_relaxed_ordering(dev);/config relaxed ordering /
pci_configure_ltr(dev);
pci_configure_eetlp_prefix(dev);
pci_configure_serr(dev);/ 和bridge的SERR#有关 /
pci_acpi_program_hp_params(dev, &hp_ops);
device_initialize(&dev->dev); /init device structure./
dev->dev.release = pci_release_dev;/设置release函数/
set_dev_node(&dev->dev, pcibus_to_node(bus));//设置numa_node
dma_set_max_seg_size(&dev->dev, 65536);
dma_set_seg_boundary(&dev->dev, 0xffffffff);
pci_fixup_device(pci_fixup_header, dev);
/ This function disables memory decoding and releases memory resources /
/ of the device specified by kernel's boot parameter 'pci=resource_alignment=' /
/ It also rounds up size to specified alignment./
/ Later on, the kernel will assign page-aligned memory resource back/
/ to the device./
pci_reassigndev_resource_alignment(dev);
pci_init_capabilities(dev);
list_add_tail(&dev->bus_list, &bus->devices);/ Add the device to our list of discovered devices and the bus list for fixup functions /
pcibios_add_device(dev); /Initialize various capabilities 比较重要,下面附详细函数/
pci_set_msi_domain(dev);/Set up MSI IRQ domain/
device_add(&dev->dev);/add device to device hierarchy./
3.11pci_init_capabilities 具体函数内容
进一步地根据capability 进行初始化,包括sr-iov, 下面的函数都是通过函数pci_find_capability();来超找capability ID地址的:从64——config的配置空间里面的获取第一个capability的地址,然后逐个遍历查找,找到对应的ID,获取地址。
static void pci_init_capabilities(struct pci_dev *dev)
{
/* Enhanced Allocation */
pci_ea_init(dev);
/* Setup MSI caps & disable MSI/MSI-X interrupts */
pci_msi_setup_pci_dev(dev);
/* Buffers for saving PCIe and PCI-X capabilities */
pci_allocate_cap_save_buffers(dev);
/* Power Management */
pci_pm_init(dev);
/* Vital Product Data */
pci_vpd_init(dev);
/* Alternative Routing-ID Forwarding */
pci_configure_ari(dev);
/* Single Root I/O Virtualization */
pci_iov_init(dev); / initialize the IOV capability,获取sriov信息,计算最大的busnum, 获取VF 的total值,获取每个bar空间的size,分配资源池size=total*bar_size, 后续vf分配的bar资源就是从这里来的, 计算最大的bus号,为后面的vf使用。 /
/* Address Translation Services */
pci_ats_init(dev);
/* Enable ACS P2P upstream forwarding */
pci_enable_acs(dev);
/* Precision Time Measurement */
pci_ptm_init(dev);
/* Advanced Error Reporting */
pci_aer_init(dev);
pcie_report_downtraining(dev);
if (pci_probe_reset_function(dev) == 0)/check whether the device can be safely reset/
dev->reset_fn = 1;
}