pcie的拓扑结构
我们知道对于PCIe总线使用的是拓扑结构,如下图
整个PCIe是一个树形的拓扑:
Root Complex
是树的根,它一般实现了一个主桥设备(host bridge), 一条内部PCIe总线(BUS 0),以及通过若干个PCI bridge扩展出一些root port
。host bridge
可以完成CPU地址到PCI域地址的转换,pci bridge
用于系统的扩展,没有地址转换功能;Swich
是转接器设备,目的是扩展PCIe总线。switch
中有一个upstream port
和若干个downstream port
, 每一个端口都相当于一个pci bridge
;PCIe ep device
是叶子节点设备,比如pcie
网卡,显卡,nvme卡
等。
每个PCIe设备,包括host bridge
、pci bridge
和ep
设备都有一个4k
的配置空间。arm
使用ecam
的方式访问pcie
配置空间。
1. PCIE软件层次
pcie
代码主要分散在三个目录:
drivers/pci/...
driver/acpi/pci*
arch/**/kernel/pci.c
将pcie
代码按如下层次划分:
|-->+ pcie hp service driver +
|-->+ pcie aer service driver +
|-->+ pcie pme service driver +
|-->+ pcie dpc service driver +
|
+---------------------+ +----------------+
| pcie port bus driver| | pcie ep driver |
+---------------------+ +----------------+
+------------------------------------------+
| pcie core driver |
+------------------------------------------+
+------------------+ +-------------------+
| arch pcie driver | | acpi pcie driver |
+------------------+ +-------------------+
____________________________________________
+------------------------------------------+
| pcie hardware |
+------------------------------------------+
arch pcie driver
:放一些和架构强相关的pcie的函数实现,对应arch/***/kernel/pci.c
acpi pcie driver
: apci扫描时所涉及到的pcie代码,包括host bridge
的解析初始化,pcie bus
的创建,ecam
的映射等, 对应drivers/acpi/pci*.c
pcie core driver
: pcie的子系统代码,包括pcie的枚举流程,资源分配流程,中断流程等,主要对应drivers/pci/*.c
pcie port bus driver
: 是pcie port的四个service代码的整合, 四个service主要指的是pciedpc/pme/hotplug/aer
,对应的是drivers/pci/pcie/*
pcie ep driver
:是叶子节点的设备驱动,比如显卡,网卡,nvme等。
2. PCIe初始化分析
通过分析内核中的system.map
文件查看pci
的初始化调用函数
root@uos-PC:/boot# cat System.map-4.19.0-loongson-3-desktop | grep pci | grep initcall
900000000172b230 t __initcall_pci_realloc_setup_params0
900000000172b3e0 t __initcall_pcibus_class_init2
900000000172b3e8 t __initcall_pci_driver_init2
900000000172b4d0 t __initcall_acpi_pci_init3
900000000172b678 t __initcall_pci_slot_init4
900000000172b780 t __initcall_pcibios_init4
900000000172b988 t __initcall_pci_apply_final_quirks5s
900000000172bd58 t __initcall_pci_proc_init6
900000000172bd60 t __initcall_pcie_portdrv_init6
900000000172bd68 t __initcall_pci_hotplug_init6
900000000172bd78 t __initcall_loongson_pci_driver_init6
900000000172bd80 t __initcall_loongson_ppci_driver_init6
900000000172bdc8 t __initcall_serial_pci_driver_init6
900000000172be70 t __initcall_mvumi_pci_driver_init6
900000000172be90 t __initcall_ahci_pci_driver_init6
900000000172bec8 t __initcall_stmmac_pci_driver_init6
900000000172c100 t __initcall_pci_resource_alignment_sysfs_init7
900000000172c108 t __initcall_pci_sysfs_init7
可以看出关键symbol
的调用顺序如下:
|-->pcibus_class_init() /* postcore_initcall(pcibus_class_init) */
|
|-->pci_driver_init() /* postcore_initcall(pci_driver_init) */
|
|-->acpi_pci_init() /* arch_initcall(acpi_pci_init) */
|
|-->acpi_init() /* subsys_initcall(acpi_init) */
pcibus_class_init()
: 注册pci_bus class
,完成后创建了/sys/class/pci_bus
目录。pci_driver_init()
: 注册pci_bus_type
, 完成后创建了/sys/bus/pci
目录。acpi_pci_init()
: 注册acpi_pci_bus
, 并设置电源管理相应的操作。acpi_init()
: apci启动所涉及到的初始化流程,PCIe基于acpi
的启动流程从该接口进入。
在linux/Documentation/firmware-guide/acpi/namespace.txt
中定义了acpi解析的流程
+---------+ +-------+ +--------+ +------------------------+
| RSDP | +->| XSDT | +->| FADT | | +-------------------+ |
+---------+ | +-------+ | +--------+ +-|->| DSDT | |
| Pointer | | | Entry |-+ | ...... | | | +-------------------+ |
+---------+ | +-------+ | X_DSDT |--+ | | Definition Blocks | |
| Pointer |-+ | ..... | | ...... | | +-------------------+ |
+---------+ +-------+ +--------+ | +-------------------+ |
| Entry |------------------|->| SSDT | |
+- - - -+ | +-------------------| |
| Entry | - - - - - - - -+ | | Definition Blocks | |
+- - - -+ | | +-------------------+ |
| | +- - - - - - - - - -+ |
+-|->| SSDT | |
| +-------------------+ |
| | Definition Blocks | |
| +- - - - - - - - - -+ |
+------------------------+
|
OSPM Loading |
\|/
+----------------+
| ACPI Namespace |
+----------------+
Figure 1. ACPI Definition Blocks
ACPI Namespace
就是表示系统上所有可枚举的ACPI设备的层次结构。
2.1 pcibus_class_init
driver/pci/probe.c
static struct class pcibus_class = {
.name = "pci_bus",
.dev_release = &release_pcibus_dev,
.dev_groups = pcibus_groups,
};
static int __init pcibus_class_init(void)
{
return class_register(&pcibus_class);
}
postcore_initcall(pcibus_class_init);
2.2 pci_driver_init
driver/pci/pci-driver.c
static int __init pci_driver_init(void)
{
int ret;
ret = bus_register(&pci_bus_type);
if (ret)
return ret;
#ifdef CONFIG_PCIEPORTBUS
ret = bus_register(&pcie_port_bus_type);
if (ret)
return ret;
#endif
dma_debug_add_bus(&pci_bus_type);
return 0;
}
postcore_initcall(pci_driver_init);
2.3 acpi_pci_init
static int __init acpi_pci_init(void)
{
int ret;
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) {
pr_info("ACPI FADT declares the system doesn't support MSI, so disable it\n");
pci_no_msi();
}
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
pr_info("ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
pcie_no_aspm();
}
ret = register_acpi_bus_type(&acpi_pci_bus);
if (ret)
return 0;
pci_set_platform_pm(&acpi_pci_platform_pm);
acpi_pci_slot_init();
acpiphp_init();
return 0;
}
arch_initcall(acpi_pci_init);
2.4 pci_slot_init
driver/pci/slot.c
static int pci_slot_init(void)
{
struct kset *pci_bus_kset;
pci_bus_kset = bus_get_kset(&pci_bus_type);
pci_slots_kset = kset_create_and_add("slots", NULL,
&pci_bus_kset->kobj);
if (!pci_slots_kset) {
pr_err("PCI: Slot initialization failure\n");
return -ENOMEM;
}
return 0;
}
subsys_initcall(pci_slot_init);
2.5 acpi_init
这里有一个ACPI(Advanced Configuration and Power Interface),是因为ACPI提供了电源、硬件和固件的接口。这里只关注软件角度的ACPI的结构——在屏蔽了硬件细节的同时,提供了一系列系统资源,包括:
- ACPI寄存器
- ACPI BIOS
- ACPI Tables
drivers/acpi/bus.c
static int __init acpi_init(void)
{
int result;
...
acpi_kobj = kobject_create_and_add("acpi", firmware_kobj);
if (!acpi_kobj) {
printk(KERN_WARNING "%s: kset create error\n", __func__);
acpi_kobj = NULL;
}
result = acpi_bus_init();
...
pci_mmcfg_late_init();
acpi_iort_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
acpi_sleep_proc_init();
acpi_wakeup_device_init();
acpi_debugger_init();
acpi_setup_sb_notify_handler();
return 0;
}
subsys_initcall(acpi_init);
其中和pci相关的是:
acpi_init() /* subsys_initcall(acpi_init) */
+-> mmcfg_late_init()
+-> acpi_scan_init()
+-> acpi_pci_root_init()
+-> acpi_scan_add_handler_with_hotplug(&pci_root_handler, "pci_root");
+-> .attach = acpi_pci_root_add
/*
* register pci_link_handler to list: acpi_scan_handlers_list.
* this handler has relationship with PCI IRQ.
*/
+-> acpi_pci_link_init()
/* we facus on PCI-ACPI, ignore other handlers' init */
...
+-> acpi_bus_scan()
/* create struct acpi_devices for all device in this system */
--> acpi_walk_namespace()
--> acpi_bus_attach()
--> acpi_scan_attach_handler()
--> acpi_scan_match_handler()
--> handler->attach /* attach is acpi_pci_root_add */
mmcfg_late_init()
, acpi先扫描MCFG
表,MCFG
表定义了ecam
的相关资源。acpi_pci_root_init()
,定义pcie host bridge device
的attach
函数, ACPI的Definition Block中使用PNP0A03
表示一个PCI Host Bridge
。acpi_pci_link_init()
, 注册pci_link_handler
, 主要和pcie IRQ
相关。acpi_bus_scan()
, 会通过acpi_walk_namespace()
会遍历system
中所有的device
,并为这些acpi device
创建数据结构,执行对应device的attatch
函数。根据ACPI spec定义,pcie host bridge device
定义在DSDT表中,acpi在扫描过程中扫描DSDT,如果发现了pcie host bridge
, 就会执行device对应的attach
函数,调用到acpi_pci_root_add()
。
2.5.1 acpi_scan_init
drivers/acpi/scan.c
int __init acpi_scan_init(void)
{
int result;
acpi_status status;
struct acpi_table_stao *stao_ptr;
acpi_pci_root_init();
acpi_pci_link_init();
acpi_processor_init();
acpi_platform_init();
acpi_lpss_init();
acpi_apd_init();
acpi_cmos_rtc_init();
acpi_container_init();
acpi_memory_hotplug_init();
acpi_watchdog_init();
acpi_pnp_init();
acpi_int340x_thermal_init();
acpi_amba_init();
acpi_init_lpit();
acpi_scan_add_handler(&generic_device_handler);
/*
* If there is STAO table, check whether it needs to ignore the UART
* device in SPCR table.
*/
status = acpi_get_table(ACPI_SIG_STAO, 0,
(struct acpi_table_header **)&stao_ptr);
if (ACPI_SUCCESS(status)) {
if (stao_ptr->header.length > sizeof(struct acpi_table_stao))
pr_info(PREFIX "STAO Name List not yet supported.\n");
if (stao_ptr->ignore_uart)
acpi_get_spcr_uart_addr();
acpi_put_table((struct acpi_table_header *)stao_ptr);
}
acpi_gpe_apply_masked_gpes();
acpi_update_all_gpes();
/*
* Although we call __add_memory() that is documented to require the
* device_hotplug_lock, it is not necessary here because this is an
* early code when userspace or any other code path cannot trigger
* hotplug/hotunplug operations.
*/
mutex_lock(&acpi_scan_lock);
/*
* Enumerate devices in the ACPI namespace.
*/
result = acpi_bus_scan(ACPI_ROOT_OBJECT);
if (result)
goto out;
result = acpi_bus_get_device(ACPI_ROOT_OBJECT, &acpi_root);
if (result)
goto out;
/* Fixed feature devices do not exist on HW-reduced platform */
if (!acpi_gbl_reduced_hardware) {
result = acpi_bus_scan_fixed();
if (result) {
acpi_detach_data(acpi_root->handle,
acpi_scan_drop_device);
acpi_device_del(acpi_root);
put_device(&acpi_root->dev);
goto out;
}
}
acpi_scan_initialized = true;
out:
mutex_unlock(&acpi_scan_lock);
return result;
}
2.5.1.1 acpi_pci_root_init
drivers/acpi/pci_root.c
static struct acpi_scan_handler pci_root_handler = {
.ids = root_device_ids,
.attach = acpi_pci_root_add,
.detach = acpi_pci_root_remove,
.hotplug = {
.enabled = true,
.scan_dependent = acpi_pci_root_scan_dependent,
},
};
void __init acpi_pci_root_init(void)
{
acpi_hest_init();
if (acpi_pci_disabled)
return;
pci_acpi_crs_quirks();
acpi_scan_add_handler_with_hotplug(&pci_root_handler, "pci_root");
}
2.5.2 acpi_bus_scan
drivers/acpi/scan.c
/**
* acpi_bus_scan - Add ACPI device node objects in a given namespace scope.
* @handle: Root of the namespace scope to scan.
*
* Scan a given ACPI tree (probably recently hot-plugged) and create and add
* found devices.
*
* If no devices were found, -ENODEV is returned, but it does not mean that
* there has been a real error. There just have been no suitable ACPI objects
* in the table trunk from which the kernel could create a device and add an
* appropriate driver.
*
* Must be called under acpi_scan_lock.
*/
int acpi_bus_scan(acpi_handle handle)
{
struct acpi_device *device = NULL;
acpi_bus_scan_second_pass = false;
/* Pass 1: Avoid enumerating devices with missing dependencies. */
if (ACPI_SUCCESS(acpi_bus_check_add(handle, true, &device)))
acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
acpi_bus_check_add_1, NULL, NULL,
(void **)&device);
...
acpi_bus_attach(device, true);
if (!acpi_bus_scan_second_pass)
return 0;
/* Pass 2: Enumerate all of the remaining devices. */
device = NULL;
if (ACPI_SUCCESS(acpi_bus_check_add(handle, false, &device)))
acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
acpi_bus_check_add_2, NULL, NULL,
(void **)&device);
acpi_bus_attach(device, false);
return 0;
}
EXPORT_SYMBOL(acpi_bus_scan);
2.5.2.1 acpi_bus_attach
drivers/acpi/scan.c
static void acpi_bus_attach(struct acpi_device *device, bool first_pass)
{
struct acpi_device *child;
bool skip = !first_pass && device->flags.visited;
acpi_handle ejd;
int ret;
...
if (ACPI_SUCCESS(acpi_bus_get_ejd(device->handle, &ejd)))
register_dock_dependent_device(device, ejd);
acpi_bus_get_status(device);
/* Skip devices that are not present. */
if (!acpi_device_is_present(device)) {
device->flags.initialized = false;
acpi_device_clear_enumerated(device);
device->flags.power_manageable = 0;
return;
}
...
...
ret = acpi_scan_attach_handler(device);
if (ret < 0)
return;
device->flags.match_driver = true;
if (ret > 0 && !device->flags.enumeration_by_parent) {
acpi_device_set_enumerated(device);
goto ok;
}
ret = device_attach(&device->dev);
if (ret < 0)
return;
if (device->pnp.type.platform_id || device->flags.enumeration_by_parent)
acpi_default_enumeration(device);
else
acpi_device_set_enumerated(device);
ok:
list_for_each_entry(child, &device->children, node)
acpi_bus_attach(child, first_pass);
if (!skip && device->handler && device->handler->hotplug.notify_online)
device->handler->hotplug.notify_online(device);
}
2.5.2.2 acpi_scan_attach_handler
drivers/acpi/scan.c
static int acpi_scan_attach_handler(struct acpi_device *device)
{
struct acpi_hardware_id *hwid;
int ret = 0;
list_for_each_entry(hwid, &device->pnp.ids, list) {
const struct acpi_device_id *devid;
struct acpi_scan_handler *handler;
handler = acpi_scan_match_handler(hwid->id, &devid);
if (handler) {
if (!handler->attach) {
device->pnp.type.platform_id = 0;
continue;
}
device->handler = handler;
ret = handler->attach(device, devid);
if (ret > 0)
break;
device->handler = NULL;
if (ret < 0)
break;
}
}
return ret;
}
这里遍历的链表就是上面注册的,可以看到先acpi_scan_match_handler
,就是分别通过match
和id
进行匹配,如果匹配成功了就执行handler
的attach
函数,这就对应到了上面的acpi_pci_root_add
函数。
2.5.2.3 acpi_pci_root_add
该函数会分配一个acpi_pci_root
,并对其进行初始化,一般情况下仅含有一个HOST
桥。
static int acpi_pci_root_add(struct acpi_device *device,
const struct acpi_device_id *not_used)
{
unsigned long long segment, bus;
acpi_status status;
int result;
struct acpi_pci_root *root;
acpi_handle handle = device->handle;
int no_aspm = 0;
bool hotadd = system_state == SYSTEM_RUNNING;
bool is_pcie;
root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
if (!root)
return -ENOMEM;
segment = 0;
status = acpi_evaluate_integer(handle, METHOD_NAME__SEG, NULL,
&segment);
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
dev_err(&device->dev, "can't evaluate _SEG\n");
result = -ENODEV;
goto end;
}
/* Check _CRS first, then _BBN. If no _BBN, default to zero. */
root->secondary.flags = IORESOURCE_BUS;
status = try_get_root_bridge_busnr(handle, &root->secondary);
if (ACPI_FAILURE(status)) {
/*
* We need both the start and end of the downstream bus range
* to interpret _CBA (MMCONFIG base address), so it really is
* supposed to be in _CRS. If we don't find it there, all we
* can do is assume [_BBN-0xFF] or [0-0xFF].
*/
root->secondary.end = 0xFF;
dev_warn(&device->dev,
FW_BUG "no secondary bus range in _CRS\n");
status = acpi_evaluate_integer(handle, METHOD_NAME__BBN,
NULL, &bus);
...
}
root->device = device;
root->segment = segment & 0xFFFF;
strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
device->driver_data = root;
if (hotadd && dmar_device_add(handle)) {
result = -ENXIO;
goto end;
}
pr_info(PREFIX "%s [%s] (domain %04x %pR)\n",
acpi_device_name(device), acpi_device_bid(device),
root->segment, &root->secondary);
root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle);
is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0;
negotiate_os_control(root, &no_aspm, is_pcie);
/*上述过程就是分配一个acpi_pci_root,并对其进行初始化,一般情况下仅含有一个HOST桥。*/
/*
* TBD: Need PCI interface for enumeration/configuration of roots.
*/
/*
* Scan the Root Bridge
* --------------------
* Must do this prior to any attempt to bind the root device, as the
* PCI namespace does not get created until this call is made (and
* thus the root bridge's pci_dev does not exist).
*/
root->bus = pci_acpi_scan_root(root);
if (!root->bus) {
dev_err(&device->dev,
"Bus %04x:%02x not present in PCI namespace\n",
root->segment, (unsigned int)root->secondary.start);
device->driver_data = NULL;
result = -ENODEV;
goto remove_dmar;
}
if (no_aspm)
pcie_no_aspm();
pci_acpi_add_bus_pm_notifier(device);
device_set_wakeup_capable(root->bus->bridge, device->wakeup.flags.valid);
if (hotadd) {
pcibios_resource_survey_bus(root->bus);
pci_assign_unassigned_root_bus_resources(root->bus);
/*
* This is only called for the hotadd case. For the boot-time
* case, we need to wait until after PCI initialization in
* order to deal with IOAPICs mapped in on a PCI BAR.
*
* This is currently x86-specific, because acpi_ioapic_add()
* is an empty function without CONFIG_ACPI_HOTPLUG_IOAPIC.
* And CONFIG_ACPI_HOTPLUG_IOAPIC depends on CONFIG_X86_IO_APIC
* (see drivers/acpi/Kconfig).
*/
acpi_ioapic_add(root->device->handle);
}
pci_lock_rescan_remove();
pci_bus_add_devices(root->bus);
pci_unlock_rescan_remove();
return 1;
remove_dmar:
if (hotadd)
dmar_device_remove(handle);
end:
kfree(root);
return result;
}
acpi_pci_root_add
函数主要做了几个动作
- 通过ACPI的
_SEG
参数, 获取host bridge
使用的segment
号,segment
指的就是pcie domain
, 主要目的是为了突破pcie
最大256条bus的限制。 - 通过ACPI的
_CRS
里的BusRange
类型资源取得该Host Bridge
的Secondary
总线范围,保存在root->secondary
这个resource
中。 - 通过ACPI的
_BNN
参数获取host bridge
的根总线号。
struct acpi_pci_root {
struct acpi_device * device;
struct pci_bus *bus;
u16 segment;
struct resource secondary; /* downstream bus range */
u32 osc_support_set; /* _OSC state of support bits */
u32 osc_control_set; /* _OSC state of control bits */
phys_addr_t mcfg_addr;
};
通过ACPI表获取HOST主桥的segment
和bus
号。
执行到这里如果没有返回失败,硬件设备上会有如下打印:
[ 0.017559] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
3. PCIe枚举设备
3.1 pci_acpi_scan_root枚举PCI设备
pci_acpi_scan_root
函数是 pcie枚举流程的入口函数
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
int node = acpi_get_node(root->device->handle);
struct acpi_pci_generic_root_info *ri;
struct pci_bus *bus, *child;
struct acpi_pci_root_ops *root_ops;
ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node);
if (!ri)
return NULL;
root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
if (!root_ops) {
kfree(ri);
return NULL;
}
ri->cfg = pci_acpi_setup_ecam_mapping(root);
if (!ri->cfg) {
kfree(ri);
kfree(root_ops);
return NULL;
}
root_ops->release_info = pci_acpi_generic_release_info;
root_ops->prepare_resources = pci_acpi_root_prepare_resources;
root_ops->pci_ops = &ri->cfg->ops->pci_ops;
bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
if (!bus)
return NULL;
....
return bus;
}
pci_acpi_setup_ecam_mapping()
, 建立ecam映射。 arm64上访问pcie的配置空间都是通过ecam
机制进行访问,将ecam
的空间进行映射,这样cpu就可以通过访问内存访问到相应设备的配置空间。- 通过
pci_find_bus
查找HOST Bridge对应的segment
,bus num
有没有被注册,如果注册了就更新一下信息,没有注册则调用acpi_pci_root_create
创建,该函数中有两个比较重要,一个是pci_create_root_bus
,该函数将会分配一个主桥结构,就是一个device
,其parent
为NULL
,为PCI设备的顶级顶点。
3.1.1 pci_acpi_setup_ecam_mapping
static struct pci_config_window *
pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
{
struct device *dev = &root->device->dev;
struct resource *bus_res = &root->secondary;
u16 seg = root->segment;
struct pci_ecam_ops *ecam_ops;
struct resource cfgres;
struct acpi_device *adev;
struct pci_config_window *cfg;
int ret;
ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
if (ret) {
dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
return NULL;
}
adev = acpi_resource_consumer(&cfgres);
...
cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
if (IS_ERR(cfg)) {
dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
PTR_ERR(cfg));
return NULL;
}
return cfg;
}
struct pci_ecam_ops ali_pcie_ops = {
.bus_shift = 20,
.init = ali_pcie_init,
.pci_ops = {
.map_bus = ali_pcie_map_bus,
.read = ali_pcie_rd_conf,
.write = ali_pcie_wr_conf,
}
};
pci_mcfg_lookup()
, 通过该接口可以获取ecam
的资源以及访问配置空间的操作ecam_ops
.ecam_ops
默认是pci_generic_ecam_ops
, 定义在drivers/pci/ecam.c
中,但也可以由厂商自定义,厂商自定义的ecam_ops
实现在drivers/pci/controller/
目录下, 比如hisi_pcie_ops
和ali_pcie_ops
,厂商会依据实际的硬件对ecam
进行限制。pci_ecam_create()
, 对ecam
的地址进行ioremap
,如果定义了ecam_ops->init
,还会执行到相应的初始化函数中.- 设置
root_ops
的pci_ops
, 这里的pci_ops
就是对应上面说的ecam_ops->pci_ops
, 即配置空间的访问接口。
3.1.2 acpi_pci_root_create
struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
struct acpi_pci_root_ops *ops,
struct acpi_pci_root_info *info,
void *sysdata)
{
int ret, busnum = root->secondary.start;
struct acpi_device *device = root->device;
int node = acpi_get_node(device->handle);
struct pci_bus *bus;
struct pci_host_bridge *host_bridge;
union acpi_object *obj;
info->root = root;
info->bridge = device;
info->ops = ops;
INIT_LIST_HEAD(&info->resources);
snprintf(info->name, sizeof(info->name), "PCI Bus %04x:%02x",
root->segment, busnum);
if (ops->init_info && ops->init_info(info))
goto out_release_info;
if (ops->prepare_resources)
ret = ops->prepare_resources(info);
else
ret = acpi_pci_probe_root_resources(info);
if (ret < 0)
goto out_release_info;
pci_acpi_root_add_resources(info);
pci_add_resource(&info->resources, &root->secondary);
bus = pci_create_root_bus(NULL, busnum, ops->pci_ops,
sysdata, &info->resources);
if (!bus)
goto out_release_info;
host_bridge = to_pci_host_bridge(bus->bridge);
if (!(root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
host_bridge->native_pcie_hotplug = 0;
if (!(root->osc_control_set & OSC_PCI_SHPC_NATIVE_HP_CONTROL))
host_bridge->native_shpc_hotplug = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_AER_CONTROL))
host_bridge->native_aer = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_PME_CONTROL))
host_bridge->native_pme = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_LTR_CONTROL))
host_bridge->native_ltr = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_DPC_CONTROL))
host_bridge->native_dpc = 0;
/*
* Evaluate the "PCI Boot Configuration" _DSM Function. If it
* exists and returns 0, we must preserve any PCI resource
* assignments made by firmware for this host bridge.
*/
obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 1,
DSM_PCI_PRESERVE_BOOT_CONFIG, NULL);
if (obj && obj->type == ACPI_TYPE_INTEGER && obj->integer.value == 0)
host_bridge->preserve_config = 1;
ACPI_FREE(obj);
pci_scan_child_bus(bus);
pci_set_host_bridge_release(host_bridge, acpi_pci_root_release_info,
info);
if (node != NUMA_NO_NODE)
dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
return bus;
out_release_info:
__acpi_pci_root_release_info(info);
return NULL;
}
3.3.1 pci_create_root_bus
pci_create_root_bus()
用来创建该{segment: busnr}
下的根总线。传递的参数: NULL
是host bridge
设备的parent
节点; busnum
是总线号; ops->pci_ops
对应的是ecam->pci_ops
,即配置空间的操作接口; sysdata
私有数据,对应的是pcie_create_ecam()
所返回的pci_cfg_window
, 包括ecam
的地址范围,映射地址等; info->resource
是一个resource_list
, 用来保存总线号,I/O
空间,mem
空间等信息。
struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
struct pci_ops *ops, void *sysdata, struct list_head *resources)
{
int error;
struct pci_host_bridge *bridge;
bridge = pci_alloc_host_bridge(0);//一个pci_host_bridge对应一个pci host bridge设备。
if (!bridge)
return NULL;
bridge->dev.parent = parent;//设置该bridge的parent为NULL, 说明host bridge device是最上层设备。
list_splice_init(resources, &bridge->windows);
bridge->sysdata = sysdata;
bridge->busnr = bus;
bridge->ops = ops;
error = pci_register_host_bridge(bridge);
if (error < 0)
goto err_out;
return bridge->bus;//返回root_bus, 即pci_host_bridge的bus成员。
err_out:
put_device(&bridge->dev);
return NULL;
}
EXPORT_SYMBOL_GPL(pci_create_root_bus);
- 分配一个主桥结构,就是一个
device
,其parent
为NULL
,为PCI设备的顶级顶点 - 接下来注册主桥
3.3.1.1 pci_register_host_bridge
注册host bridge device
。
主要是为host bridge数据结构注册对应的设备,创建了一个根总线pci_bus
, 也为该pci_bus
数据结构注册一个设备并填充初始化的数据。
static int pci_register_host_bridge(struct pci_host_bridge *bridge)
{
struct device *parent = bridge->dev.parent;
struct resource_entry *window, *n;
struct pci_bus *bus, *b;
resource_size_t offset;
LIST_HEAD(resources);
struct resource *res;
char addr[64], *fmt;
const char *name;
int err;
bus = pci_alloc_bus(NULL);
if (!bus)
return -ENOMEM;
bridge->bus = bus;
/* Temporarily move resources off the list */
list_splice_init(&bridge->windows, &resources);
bus->sysdata = bridge->sysdata;
bus->msi = bridge->msi;
bus->ops = bridge->ops;
bus->number = bus->busn_res.start = bridge->busnr;
#ifdef CONFIG_PCI_DOMAINS_GENERIC
bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
#endif
b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
if (b) {
/* Ignore it if we already got here via a different bridge */
dev_dbg(&b->dev, "bus already known\n");
err = -EEXIST;
goto free;
}
dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(bus),
bridge->busnr);
err = pcibios_root_bridge_prepare(bridge);
if (err)
goto free;
err = device_add(&bridge->dev);
if (err) {
put_device(&bridge->dev);
goto free;
}
bus->bridge = get_device(&bridge->dev);
device_enable_async_suspend(bus->bridge);
pci_set_bus_of_node(bus);
pci_set_bus_msi_domain(bus);
if (!parent)
set_dev_node(bus->bridge, pcibus_to_node(bus));
bus->dev.class = &pcibus_class;
bus->dev.parent = bus->bridge;
dev_set_name(&bus->dev, "%04x:%02x", pci_domain_nr(bus), bus->number);
name = dev_name(&bus->dev);
err = device_register(&bus->dev);
...
pcibios_add_bus(bus);
if (bus->ops->add_bus) {
err = bus->ops->add_bus(bus);
if (WARN_ON(err < 0))
dev_err(&bus->dev, "failed to add bus: %d\n", err);
}
/* Create legacy_io and legacy_mem files for this bus */
pci_create_legacy_files(bus);
if (parent)
dev_info(parent, "PCI host bridge to bus %s\n", name);
else
pr_info("PCI host bridge to bus %s\n", name);
if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
/* Add initial resources to the bus */
resource_list_for_each_entry_safe(window, n, &resources) {
list_move_tail(&window->node, &bridge->windows);
offset = window->offset;
res = window->res;
if (res->flags & IORESOURCE_BUS)
pci_bus_insert_busn_res(bus, bus->number, res->end);
else
pci_bus_add_resource(bus, res, 0);
if (offset) {
if (resource_type(res) == IORESOURCE_IO)
fmt = " (bus address [%#06llx-%#06llx])";
else
fmt = " (bus address [%#010llx-%#010llx])";
snprintf(addr, sizeof(addr), fmt,
(unsigned long long)(res->start - offset),
(unsigned long long)(res->end - offset));
} else
addr[0] = '\0';
dev_info(&bus->dev, "root bus resource %pR%s\n", res, addr);
}
down_write(&pci_bus_sem);
list_add_tail(&bus->node, &pci_root_buses);
up_write(&pci_bus_sem);
return 0;
unregister:
put_device(&bridge->dev);
device_del(&bridge->dev);
free:
kfree(bus);
return err;
}
一个主桥下面新建了一个pci_bus
, 其也对应一个设备,这两个设备都注册到系统中,注意他们的名字
dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(bus), bridge->busnr);
dev_set_name(&bus->dev, "%04x:%02x", pci_domain_nr(bus), bus->number);
对应串口log为:
[ 0.018264] PCI host bridge to bus 0000:00
[ 0.018267] pci_bus 0000:00: root bus resource [io 0x4000-0xffff window]
[ 0.018270] pci_bus 0000:00: root bus resource [mem 0xe0040000000-0xe007fffffff window] (bus address [0x40000000-0x7fffffff])
[ 0.018273] pci_bus 0000:00: root bus resource [bus 00-ff]
下图显示了注册后的PCI设备分级情况,其他的PCI设备都挂在主桥下
uos@uos-PC:~$ ls /sys/devices/pci0000\:00/
0000:00:00.0 0000:00:05.0 0000:00:08.0 0000:00:0a.0 0000:00:0f.0 0000:00:17.0 power
0000:00:04.0 0000:00:05.1 0000:00:08.1 0000:00:0b.0 0000:00:13.0 firmware_node uevent
0000:00:04.1 0000:00:07.0 0000:00:08.2 0000:00:0d.0 0000:00:16.0 pci_bus
3.3.2 pci_scan_child_bus
开始遍历host bridge
主桥下的所有pci设备
pci_scan_child_bus
包含整个的枚举过程,简单而言就是一个DFS
,这个过程最终确定了每级PCI桥的bus范围:[secondary, subordinate]
/**
* pci_scan_child_bus() - Scan devices below a bus
* @bus: Bus to scan for devices
*
* Scans devices below @bus including subordinate buses. Returns new
* subordinate number including all the found devices.
*/
unsigned int pci_scan_child_bus(struct pci_bus *bus)
{
return pci_scan_child_bus_extend(bus, 0);
}
EXPORT_SYMBOL_GPL(pci_scan_child_bus);
pci_scan_child_bus()
+-> pci_scan_child_bus_extend()
+-> for dev range(0, 256)
pci_scan_slot()
+-> pci_scan_single_device()
+-> pci_scan_device()
+-> pci_bus_read_dev_vendor_id()
+-> pci_alloc_dev()
+-> pci_setup_device()
+-> pci_add_device()
+-> for each pci bridge
+-> pci_scan_bridge_extend()
pci_scan_slot()
: 一条pcie总线最多32个设备,每个设备最多8个function
, 所以这里pci_scan_child_bus
枚举了所有的pcie function
, 调用了pci_scan_slot
256次,pci_scan_slot
调用pci_scan_single_device()
配置当前总线下的所有pci设备。pci_scan_single_device()
: 进一步调用pci_scan_device()
和pci_add_device()
。pci_scan_device
先去通过配置空间访问接口读取设备的vendor id
, 如果60s
没读到,说明没有找到该设备。 如果找到该设备,则通过pci_alloc_dev
创建pci_dev
数据结构,并对pci的配置空间进行一些配置。pci_setup_device()
: 获取pci设备信息,中断号,BAR地址和大小(使用pci_read_bases
, 就是往BAR地址写1来计算的),并保存到pci_dev->resources
中。pci_add_device
,软件将pci dev
添加到设备list
中。
3.3.3 pci_scan_child_bus_extend
现在我们已经扫描完了host bridge
下的bus
和dev
, 现在开始扫描bridge
, 一个bridge
也对应一个pci_dev
。比如switch
中的每一个port
对应一个pci bridge
。
pci_scan_bridge_extend()
就是用于扫描pci桥和pci桥下的所有设备, 这个函数会被调用2次,第一次是处理BIOS已经配置好的pci桥, 这个是为了兼容各个架构所做的妥协。通过2次调用pci_scan_bridge_extend
函数,完成所有的pci桥的处理。
/**
* pci_scan_child_bus_extend() - Scan devices below a bus
* @bus: Bus to scan for devices
* @available_buses: Total number of buses available (%0 does not try to
* extend beyond the minimal)
*
* Scans devices below @bus including subordinate buses. Returns new
* subordinate number including all the found devices. Passing
* @available_buses causes the remaining bus space to be distributed
* equally between hotplug-capable bridges to allow future extension of the
* hierarchy.
*/
static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
unsigned int available_buses)
{
unsigned int used_buses, normal_bridges = 0, hotplug_bridges = 0;
unsigned int start = bus->busn_res.start;
unsigned int devfn, fn, cmax, max = start;
struct pci_dev *dev;
int nr_devs;
dev_dbg(&bus->dev, "scanning bus\n");
/* Go find them, Rover! */
for (devfn = 0; devfn < 256; devfn += 8) {
nr_devs = pci_scan_slot(bus, devfn);
/*
* The Jailhouse hypervisor may pass individual functions of a
* multi-function device to a guest without passing function 0.
* Look for them as well.
*/
if (jailhouse_paravirt() && nr_devs == 0) {
for (fn = 1; fn < 8; fn++) {
dev = pci_scan_single_device(bus, devfn + fn);
if (dev)
dev->multifunction = 1;
}
}
}
/* Reserve buses for SR-IOV capability */
used_buses = pci_iov_bus_range(bus);
max += used_buses;
/*
* After performing arch-dependent fixup of the bus, look behind
* all PCI-to-PCI bridges on this bus.
*/
if (!bus->is_added) {
dev_dbg(&bus->dev, "fixups for bus\n");
pcibios_fixup_bus(bus);
bus->is_added = 1;
}
/*
* Calculate how many hotplug bridges and normal bridges there
* are on this bus. We will distribute the additional available
* buses between hotplug bridges.
*/
for_each_pci_bridge(dev, bus) {
if (dev->is_hotplug_bridge)
hotplug_bridges++;
else
normal_bridges++;
}
/*
* Scan bridges that are already configured. We don't touch them
* unless they are misconfigured (which will be done in the second
* scan below).
*/
for_each_pci_bridge(dev, bus) {
cmax = max;
max = pci_scan_bridge_extend(bus, dev, max, 0, 0);
/*
* Reserve one bus for each bridge now to avoid extending
* hotplug bridges too much during the second scan below.
*/
used_buses++;
if (cmax - max > 1)
used_buses += cmax - max - 1;
}
/* Scan bridges that need to be reconfigured */
for_each_pci_bridge(dev, bus) {
unsigned int buses = 0;
if (!hotplug_bridges && normal_bridges == 1) {
/*
* There is only one bridge on the bus (upstream
* port) so it gets all available buses which it
* can then distribute to the possible hotplug
* bridges below.
*/
buses = available_buses;
} else if (dev->is_hotplug_bridge) {
/*
* Distribute the extra buses between hotplug
* bridges if any.
*/
buses = available_buses / hotplug_bridges;
buses = min(buses, available_buses - used_buses + 1);
}
cmax = max;
max = pci_scan_bridge_extend(bus, dev, cmax, buses, 1);
/* One bus is already accounted so don't add it again */
if (max - cmax > 1)
used_buses += max - cmax - 1;
}
/*
* Make sure a hotplug bridge has at least the minimum requested
* number of buses but allow it to grow up to the maximum available
* bus number of there is room.
*/
if (bus->self && bus->self->is_hotplug_bridge) {
used_buses = max_t(unsigned int, available_buses,
pci_hotplug_bus_size - 1);
if (max - start < used_buses) {
max = start + used_buses;
/* Do not allocate more buses than we have room left */
if (max > bus->busn_res.end)
max = bus->busn_res.end;
dev_dbg(&bus->dev, "%pR extended by %#02x\n",
&bus->busn_res, max - start);
}
}
/*
* We've scanned the bus and so we know all about what's on
* the other side of any bridges that may be on this bus plus
* any devices.
*
* Return how far we've got finding sub-buses.
*/
dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
return max;
}
3.3.3.1 pci_scan_slot
pci_scan_slot->pci_scan_single_device
/**
* pci_scan_slot - Scan a PCI slot on a bus for devices
* @bus: PCI bus to scan
* @devfn: slot number to scan (must have zero function)
*
* Scan a PCI slot on the specified PCI bus for devices, adding
* discovered devices to the @bus->devices list. New devices
* will not have is_added set.
*
* Returns the number of new devices found.
*/
int pci_scan_slot(struct pci_bus *bus, int devfn)
{
unsigned fn, nr = 0;
struct pci_dev *dev;
if (only_one_child(bus) && (devfn > 0))
return 0; /* Already scanned the entire slot */
dev = pci_scan_single_device(bus, devfn);
if (!dev)
return 0;
if (!pci_dev_is_added(dev))
nr++;
for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
dev = pci_scan_single_device(bus, devfn + fn);
if (dev) {
if (!pci_dev_is_added(dev))
nr++;
dev->multifunction = 1;
}
}
/* Only one slot has PCIe device */
if (bus->self && nr)
pcie_aspm_init_link_state(bus->self);
return nr;
}
EXPORT_SYMBOL(pci_scan_slot);
3.3.3.2 pci_scan_single_device
struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
dev = pci_get_slot(bus, devfn);
if (dev) {
pci_dev_put(dev);
return dev;
}
dev = pci_scan_device(bus, devfn);
if (!dev)
return NULL;
pci_device_add(dev, bus);
return dev;
}
EXPORT_SYMBOL(pci_scan_single_device);
- 这个函数就是分配,配置
pci_dev
,并将其加入设备模型。
接下来就是扫描PCI桥的递归过程了:
3.3.3.3 pci_scan_bridge_extend
现在我们已经扫描完了host bridge
下的bus
和dev
, 现在开始扫描bridge
, 一个bridge
也对应一个pci_dev
。比如switch
中的每一个port
对应一个pci bridge
。
pci_scan_bridge_extend()
就是用于扫描pci桥和pci桥下的所有设备, 这个函数会被调用2次,第一次是处理BIOS已经配置好的pci桥, 这个是为了兼容各个架构所做的妥协。通过2次调用pci_scan_bridge_extend
函数,完成所有的pci桥的处理。
/*
* pci_scan_bridge_extend() - Scan buses behind a bridge
* @bus: Parent bus the bridge is on
* @dev: Bridge itself
* @max: Starting subordinate number of buses behind this bridge
* @available_buses: Total number of buses available for this bridge and
* the devices below. After the minimal bus space has
* been allocated the remaining buses will be
* distributed equally between hotplug-capable bridges.
* @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
* that need to be reconfigured.
*
* If it's a bridge, configure it and scan the bus behind it.
* For CardBus bridges, we don't scan behind as the devices will
* be handled by the bridge driver itself.
*
* We need to process bridges in two passes -- first we scan those
* already configured by the BIOS and after we are done with all of
* them, we proceed to assigning numbers to the remaining buses in
* order to avoid overlaps between old and new bus numbers.
*
* Return: New subordinate number covering all buses behind this bridge.
*/
static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev,
int max, unsigned int available_buses,
int pass)
{
...
/*
* Make sure the bridge is powered on to be able to access config
* space of devices below it.
*/
pm_runtime_get_sync(&dev->dev);
pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
primary = buses & 0xFF;
secondary = (buses >> 8) & 0xFF;
subordinate = (buses >> 16) & 0xFF;
...
/*
* Disable Master-Abort Mode during probing to avoid reporting of
* bus errors in some architectures.
*/
pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
pci_enable_crs(dev);
if ((secondary || subordinate) && !pcibios_assign_all_busses() &&
!is_cardbus && !broken) {
unsigned int cmax;
/*
* Bus already configured by firmware, process it in the
* first pass and just note the configuration.
*/
if (pass)
goto out;
/*
* The bus might already exist for two reasons: Either we
* are rescanning the bus or the bus is reachable through
* more than one bridge. The second case can happen with
* the i450NX chipset.
*/
child = pci_find_bus(pci_domain_nr(bus), secondary);
if (!child) {
child = pci_add_new_bus(bus, dev, secondary);
if (!child)
goto out;
child->primary = primary;
pci_bus_insert_busn_res(child, secondary, subordinate);
child->bridge_ctl = bctl;
}
cmax = pci_scan_child_bus(child);
if (cmax > subordinate)
pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n",
subordinate, cmax);
/* Subordinate should equal child->busn_res.end */
if (subordinate > max)
max = subordinate;
} else {
/*
* We need to assign a number to this bus which we always
* do in the second pass.
*/
if (!pass) { //第一次pci_scan_bridge的pass参数为0,在这里直接返回。
if (pcibios_assign_all_busses() || broken || is_cardbus)
/*
* Temporarily disable forwarding of the
* configuration cycles on all bridges in
* this bus segment to avoid possible
* conflicts in the second pass between two
* bridges programmed with overlapping bus
* ranges.
*/
pci_write_config_dword(dev, PCI_PRIMARY_BUS,
buses & ~0xffffff);
goto out;
}
/* Clear errors */
pci_write_config_word(dev, PCI_STATUS, 0xffff);
/* Read bus numbers from EA Capability (if present) */
fixed_buses = pci_ea_fixed_busnrs(dev, &fixed_sec, &fixed_sub);
if (fixed_buses)
next_busnr = fixed_sec;
else
next_busnr = max + 1;
/*
* Prevent assigning a bus number that already exists.
* This can happen when a bridge is hot-plugged, so in this
* case we only re-scan this bus.
*/
child = pci_find_bus(pci_domain_nr(bus), next_busnr);
if (!child) {
child = pci_add_new_bus(bus, dev, next_busnr);
if (!child)
goto out;
pci_bus_insert_busn_res(child, next_busnr,
bus->busn_res.end);
}
max++;
if (available_buses)
available_buses--;
buses = (buses & 0xff000000)//生成新的BUS号,准备写入pci配置空间。
| ((unsigned int)(child->primary) << 0)
| ((unsigned int)(child->busn_res.start) << 8)
| ((unsigned int)(child->busn_res.end) << 16);
/*
* yenta.c forces a secondary latency timer of 176.
* Copy that behaviour here.
*/
if (is_cardbus) {
buses &= ~0xff000000;
buses |= CARDBUS_LATENCY_TIMER << 24;
}
/* We need to blast all three values with a single write */
pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);//将该pci bridge的primary bus, secondary bus, subordinate bus写入配置空间。
if (!is_cardbus) {
child->bridge_ctl = bctl;
max = pci_scan_child_bus_extend(child, available_buses);
//这里又递归调用了pci_scan_child_bus, 扫描该子总线下所有设备。
} else {
/*
* For CardBus bridges, we leave 4 bus numbers as
* cards with a PCI-to-PCI bridge can be inserted
* later.
*/
for (i = 0; i < CARDBUS_RESERVE_BUSNR; i++) {
struct pci_bus *parent = bus;
if (pci_find_bus(pci_domain_nr(bus),
max+i+1))
break;
while (parent->parent) {
if ((!pcibios_assign_all_busses()) &&
(parent->busn_res.end > max) &&
(parent->busn_res.end <= max+i)) {
j = 1;
}
parent = parent->parent;
}
if (j) {
/*
* Often, there are two CardBus
* bridges -- try to leave one
* valid bus number for each one.
*/
i /= 2;
break;
}
}
max += i;
}
/*
* Set subordinate bus number to its real value.
* If fixed subordinate bus number exists from EA
* capability then use it.
*/
if (fixed_buses)
max = fixed_sub;
pci_bus_update_busn_res_end(child, max);//比较关键, 每次递归结束把实际的subordinate bus写入pci桥的配置空间。subordinate bus表示该pci桥下最大的总线号。
pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
}
sprintf(child->name,
(is_cardbus ? "PCI CardBus %04x:%02x" : "PCI Bus %04x:%02x"),
pci_domain_nr(bus), child->number);
/* Check that all devices are accessible */
while (bus->parent) {
if ((child->busn_res.end > bus->busn_res.end) ||
(child->number > bus->busn_res.end) ||
(child->number < bus->number) ||
(child->busn_res.end < bus->number)) {
dev_info(&dev->dev, "devices behind bridge are unusable because %pR cannot be assigned for them\n",
&child->busn_res);
break;
}
bus = bus->parent;
}
out:
pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
pm_runtime_put(&dev->dev);
return max;
}
pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
一开始读取pci bridge
的主bus号,是因为有的体系结构可能已经在BIOS中对这些做过配置。如果需要在kernel中进行scan bridge
,就不会进if ((secondary || subordinate) && !pcibios_assign_all_busses()
的那个if
分支。- 最后,在PCIe总线树枚举完成后,返回PCIe总线树中的最后一个pci总线号,PCIe的枚举流程至此结束。
3.2 枚举总结
总的来说,枚举流程分为3步:
- 发现主桥设备和根总线;
- 发现主桥设备下的所有pci设备
- 如果主桥下面的设备是
pci bridge
, 那么再次遍历这个pci bridge
桥下的所有pci设备,并以此递归,直到将当前的pci总线树遍历完毕,并且返回host bridge
的subordinate
总线号。
pcie的资源分配
pcie枚举完成后,pci总线号已经分配,pcie ecam
的映射、 pcie设备信息、BAR的个数及大小等也已经ready, 但此时并没有给各个pci device
的BAR
, pci bridge
的mem
, I/O
, prefetch mem
的base/limit
寄存器分配资源。
这时就需要走到pcie的资源分配流程,整个资源分配的过程就是从系统的总资源里给每个pci device
的bar分配资源,给每个pci桥的base
, limit
的寄存器分配资源。
pcie的资源分配流程整体比较复杂,主要介绍下总体的流程,对关键的函数再做展开。
pcie资源分配的入口在pci_acpi_scan_root()->pci_bus_assign_resources()
在调用pci_bus_assign_resources()
之前,先调用pci_bus_size_bridges()-->pci_bus_size_bridges()
: 用深度优先递归确定各级pci桥上base/limit
的大小,会记录在pci_dev->resource[PCI_BRIDGE_RESOURCES]
中。
再进行资源分配pci_bus_assign_resources()
:
1376 void __pci_bus_assign_resources(const struct pci_bus *bus,
1377 struct list_head *realloc_head,
1378 struct list_head *fail_head)
1379 {
1380 struct pci_bus *b;
1381 struct pci_dev *dev;
1382
1383 pbus_assign_resources_sorted(bus, realloc_head, fail_head);//这个函数先对当前总线下设备请求的资源进行排序
1384
1385 list_for_each_entry(dev, &bus->devices, bus_list) {
1386 pdev_assign_fixed_resources(dev);
1387
1388 b = dev->subordinate;
1389 if (!b)
1390 continue;
1391
1392 __pci_bus_assign_resources(b, realloc_head, fail_head);//和枚举流程一样,这里也是用深度优先遍历的方法,依次分配各个pcie ep设备的bar资源。
1393
1394 switch (dev->class >> 8) {
1395 case PCI_CLASS_BRIDGE_PCI:
1396 if (!pci_is_enabled(dev))
1397 pci_setup_bridge(b);
1398 break;
1399
1400 case PCI_CLASS_BRIDGE_CARDBUS:
1401 pci_setup_cardbus(b);
1402 break;
1403
1404 default:
1405 pci_info(dev, "not setting up bridge for bus %04x:%02x\n",
1406 pci_domain_nr(b), b->number);
1407 break;
1408 }
1409 }
1410 }
pci_setup_bridge()
,某个总线下所有设备BAR空间分配之后,将初始化该总线桥的配置空间中的memory base
寄存器(该总线子树下所有设备使用的PCI总线域地址空间的基地址)和memory limit
寄存器(总线子树使用的总地址空间的大小)。
+-> pbus_assign_resources_sorted()
+-> list_for_each_entry(dev, &bus->devices, bus_list)
__dev_sort_resources(dev, &head);
+-> __assign_resources_sorted(&head, realloc_head, fail_head);
+-> assign_requested_resources_sorted(head, fail_head);
+-> list_for_each_entry(dev_res, head, list)
pci_assign_resource(dev_res->dev, idx)
+-> pci_bus_alloc_resource()
+-> allocate_resource()
+-> find_resource()
+-> request_resource()
+->pci_update_resource()
__dev_sort_resources
将pci设备使用的资源进行对齐和排序,然后加入到head
流程中。__assign_resources_sorted
中先调用find_resource()
获取上游pci bridge
的所管理的空间资源范围。 再调用request_resource()
为当前pci设备分配pcie地址空间,最后调用pci_update_resource()
将初始化pcie bar
寄存器,将更新的资源区间写到寄存器。
最后pcie的资源枚举过程可以概况如下:
- 获取上游pci 桥设备所管理的系统资源范围
- 使用DFS对所有的pci ep device进行bar资源的分配
- 使用DFS对当前pci桥设备的base和limit的值,并对这些寄存器进行更新。
至此,pci树中所有pci设备的BAR寄存器,以及pci桥的base
、limit
寄存器都已经初始化完毕。
refer to
4. 理解linux pci 扫描流程
5. Linux设备驱动——PCI总线的初始化
6. linux下PCI设备的注册及初始化流程分析
7. mips处理器linux内核pci初始化和设备枚举详解
8. 慢慢欣赏linux PCI-PCIE初始化总结
9. Linux pci总线初始化流程
10. 总线注册(platform,PCI)
11. PCI驱动框架简单分析
12. Linux 各种 initcall 的调用原理
13. PCIe初始化枚举和资源分配流程分析
14. 《PCI Express体系结构导读》