1. 先来看下数据结构
struct pci_bus {
//根总线链接到全局根总线链表,非根总线,链接到父总线的总线链表中
struct list_head node; /* node in list of buses */
//pci总线的父总线
struct pci_bus *parent; /* parent bus this bridge is on */
//这条pci总线的子总线链表的表头
struct list_head children; /* list of child buses */
//这条pci总线的pci设备链表的表头
struct list_head devices; /* list of devices on this bus */
//引出这条pci总线的桥设备的描述符
struct pci_dev *self; /* bridge device as seen by parent */
//这条pci总线的插槽链表的表头
struct list_head slots; /* list of slots on this bus */
struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
struct list_head resources; /* address space routed to this bus */
//这条pci总线所使用的配置空间访问函数
struct pci_ops *ops; /* configuration access functions */
void *sysdata; /* hook for sys-specific extension */
struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */
unsigned char number; /* bus number */ //总线编号
unsigned char primary; /* number of primary bridge */ //引出这条pci总线的pci_dev的主编号
unsigned char secondary; /* number of secondary bridge */ //引出这条pci总线的pci_dev的次编号
unsigned char subordinate; /* max number of subordinate buses */ //这条pci总线的subordinate总线的最大编号
unsigned char max_bus_speed; /* enum pci_bus_speed */
unsigned char cur_bus_speed; /* enum pci_bus_speed */
char name[48];
unsigned short bridge_ctl; /* manage NO_ISA/FBB/et al behaviors */
pci_bus_flags_t bus_flags; /* Inherited by child busses */
struct device *bridge; //指向引出此pci总线的pci_dev的内嵌device结构
struct device dev; //内嵌的类设备对象
struct bin_attribute *legacy_io; /* legacy I/O for this bus */
struct bin_attribute *legacy_mem; /* legacy mem */
unsigned int is_added:1;
};
struct pci_dev {
//链接到其所属的pci总线的设备链表中
struct list_head bus_list; /* node in per-bus list */
//指向其所属的pci总线
struct pci_bus *bus; /* bus this device is on */
//指向这个pci_dev设备所桥接的下级总线,仅对桥接设备有效
struct pci_bus *subordinate; /* bus this device bridges to */
void *sysdata; /* hook for sys-specific extension */
struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
//指向这个设备所在的物理插槽
struct pci_slot *slot; /* Physical slot this device is in */
unsigned int devfn; /* encoded device & function index */ //pci设备的功能号
unsigned short vendor; //厂商ID
unsigned short device; //设备ID
unsigned short subsystem_vendor; //子系统厂商ID
unsigned short subsystem_device; //子系统设备ID
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
u8 revision; /* PCI revision, low byte of class word */
u8 hdr_type; /* PCI header type (`multi' flag masked out) */
u8 pcie_cap; /* PCI-E capability offset */
u8 pcie_type:4; /* PCI-E device/port type */
u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */
//ROM基地址寄存器在pci配置空间中的位置
u8 rom_base_reg; /* which config register controls the ROM */
u8 pin; /* which interrupt pin this device uses */
//指向这个pci_dev所关联的pci_driver
struct pci_driver *driver; /* which driver has allocated this device */
u64 dma_mask; /* Mask of the bits of bus address this
device implements. Normally this is
0xffffffff. You only need to change
this if your device has broken DMA
or supports 64-bit transfers. */
struct device_dma_parameters dma_parms;
pci_power_t current_state; /* Current operating state. In ACPI-speak,
this is D0-D3, D0 being fully functional,
and D3 being off. */
int pm_cap; /* PM capability offset in the
configuration space */
unsigned int pme_support:5; /* Bitmask of states from which PME#
can be generated */
unsigned int pme_interrupt:1;
unsigned int d1_support:1; /* Low power state D1 is supported */
unsigned int d2_support:1; /* Low power state D2 is supported */
unsigned int no_d1d2:1; /* Only allow D0 and D3 */
unsigned int mmio_always_on:1; /* disallow turning off io/mem
decoding during bar sizing */
unsigned int wakeup_prepared:1;
unsigned int d3_delay; /* D3->D0 transition time in ms */
#ifdef CONFIG_PCIEASPM
struct pcie_link_state *link_state; /* ASPM link state. */
#endif
pci_channel_state_t error_state; /* current connectivity state */
//内嵌的通用设备对象
struct device dev; /* Generic device interface */
int cfg_size; /* Size of configuration space */
/*
* Instead of touching interrupt line and base address registers
* directly, use the values stored here. They might be different!
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
/* These fields are used by common fixups */
unsigned int transparent:1; /* Transparent PCI bridge */
unsigned int multifunction:1;/* Part of multi-function device */
/* keep track of device state */
unsigned int is_added:1;
unsigned int is_busmaster:1; /* device is busmaster */
unsigned int no_msi:1; /* device may not use msi */
unsigned int block_ucfg_access:1; /* userspace config space access is blocked */
unsigned int broken_parity_status:1; /* Device generates false positive parity */
unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */
unsigned int msi_enabled:1;
unsigned int msix_enabled:1;
unsigned int ari_enabled:1; /* ARI forwarding */
unsigned int is_managed:1;
unsigned int is_pcie:1; /* Obsolete. Will be removed.
Use pci_is_pcie() instead */
unsigned int needs_freset:1; /* Dev requires fundamental reset */
unsigned int state_saved:1;
unsigned int is_physfn:1;
unsigned int is_virtfn:1;
unsigned int reset_fn:1;
unsigned int is_hotplug_bridge:1;
unsigned int __aer_firmware_first_valid:1;
unsigned int __aer_firmware_first:1;
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
u32 saved_config_space[16]; /* config space saved at suspend time */
struct hlist_head saved_cap_space;
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
#ifdef CONFIG_PCI_MSI
struct list_head msi_list;
#endif
struct pci_vpd *vpd;
#ifdef CONFIG_PCI_IOV
union {
struct pci_sriov *sriov; /* SR-IOV capability related */
struct pci_dev *physfn; /* the PF this VF is associated with */
};
struct pci_ats *ats; /* Address Translation Service */
#endif
};
pci_dev描述的是逻辑设备,跟usb系统的(usb_devie和usb_interface)概念有点类似。无论是配置还是扫描,都是基于pci逻辑设备。不过对于热插拔必然要针对pci物理设备,描述pci物理设备的结构体是pci_slot。
struct pci_slot {
struct pci_bus *bus; /* The bus this slot is on */ //这个slot所在的pci总线
struct list_head list; /* node in list of slots on this bus */ //链到pci总线的slot链表中
struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */
unsigned char number; /* PCI_SLOT(pci_dev->devfn) */
struct kobject kobj;
};
2. x86架构的pci系统的初始化
(1)pci_bus类的初始化(initcall2)
static int __init pcibus_class_init(void)
{
return class_register(&pcibus_class);
}
postcore_initcall(pcibus_class_init);
(2)pci总线的初始化(initcall2)
static int __init pci_driver_init(void)
{
return bus_register(&pci_bus_type);
}
postcore_initcall(pci_driver_init);
(3)pci配置访问方法的初始化(initcall3)
pci_arch_init的主要目的是为raw_pci_ops或raw_pci_ext_ops赋值,将它们指向用于访问pci配置空间的操作表,所以必须在pci扫描之前完成。
这个函数的逻辑是:先调用pci_direct_probe函数检查机制#1(或机制#2),再调用pci_pcibios_init函数检查pci bios方式,最后调用pci_direct_init函数回过头看是否要使用机制#1(或机制#2),这种调用顺序确保优先使用机制#1(或机制#2)。
static __init int pci_arch_init(void)
{
#ifdef CONFIG_PCI_DIRECT
int type = 0;
type = pci_direct_probe();
#endif
if (!(pci_probe & PCI_PROBE_NOEARLY))
pci_mmcfg_early_init();
if (x86_init.pci.arch_init && !x86_init.pci.arch_init())
return 0;
#ifdef CONFIG_PCI_BIOS
pci_pcbios_init();
#endif
/*
* don't check for raw_pci_ops here because we want pcbios as last
* fallback, yet it's needed to run first to set pcibios_last_bus
* in case legacy PCI probing is used. otherwise detecting peer busses
* fails.
*/
#ifdef CONFIG_PCI_DIRECT
pci_direct_init(type);
#endif
if (!raw_pci_ops && !raw_pci_ext_ops)
printk(KERN_ERR
"PCI: Fatal: No config space access function found\n");
dmi_check_pciprobe();
dmi_check_skip_isa_align();
return 0;
}
arch_initcall(pci_arch_init);
(4)pci总线的扫描(initcall4)
pci总线扫描流程从pci_subsys_init函数开始,主要解决3个问题:
通过pci总线扫描发现系统中各级总线上的pci设备,并对每个pci设备进行配置
在内存中为每个pci设备构建对应的数据结构,以便后续对它们进行操作
在sysfs文件系统中构建pci目录树,向用户空间导出信息,并且提供控制接口
int __init pci_subsys_init(void)
{
/*
* The init function returns an non zero value when
* pci_legacy_init should be invoked.
*/
if (x86_init.pci.init())
pci_legacy_init(); //该函数负责扫描pci总线,比较重要
pcibios_fixup_peer_bridges();
x86_init.pci.init_irq(); //pci中断路由的初始化
pcibios_init(); //pci资源分配的初始化
return 0;
}
subsys_initcall(pci_subsys_init);
int __init pci_legacy_init(void)
{
if (!raw_pci_ops) {
printk("PCI: System does not support PCI\n");
return 0;
}
printk("PCI: Probing PCI hardware\n");
pci_root_bus = pcibios_scan_root(0); //在内存建立起pci子树
if (pci_root_bus)
pci_bus_add_devices(pci_root_bus); //将这颗pci树的子总线和pci设备添加到sysfs文件系统目录
return 0;
}
pci_legacy_init
pci_scan_bus_parented
pci_create_bus
pci_scan_child_bus
pci_scan_slot
pci_scan_single_device
pci_scan_device
alloc_pci_dev
pci_device_addpci_setup_device
pci_scan_bridge
pci_add_new_bus
pci中断路由的初始化pci_alloc_child_bus
pci_scan_child_bus
void __init pcibios_irq_init(void)
{
DBG(KERN_DEBUG "PCI: IRQ init\n");
if (raw_pci_ops == NULL)
return;
dmi_check_system(pciirq_dmi_table);
pirq_table = pirq_find_routing_table(); //查找中断路由表
#ifdef CONFIG_PCI_BIOS
if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
pirq_table = pcibios_get_irq_routing_table(); //查找中断路由表
#endif
if (pirq_table) { //如果找到了中断路由表
pirq_peer_trick(); //检查pci插槽所在的pci总线是否都已经扫描到,没有扫描到就扫描
pirq_find_router(&pirq_router); //查找可编程中断路由器的驱动,保存到pirq_router
if (pirq_table->exclusive_irqs) {
int i;
for (i = 0; i < 16; i++)
if (!(pirq_table->exclusive_irqs & (1 << i)))
pirq_penalty[i] += 100;
}
/*
* If we're using the I/O APIC, avoid using the PCI IRQ
* routing table
*/
if (io_apic_assign_pci_irqs)
pirq_table = NULL;
}
x86_init.pci.fixup_irqs(); //分配ISA IRQ编号
if (io_apic_assign_pci_irqs && pci_routeirq) {
struct pci_dev *dev = NULL;
/*
* PCI IRQ routing is set up by pci_enable_device(), but we
* also do it here in case there are still broken drivers that
* don't use pci_enable_device().
*/
printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
for_each_pci_dev(dev)
pirq_enable_irq(dev);
}
}
pci资源分配流程初始化
int __init pcibios_init(void)
{
if (!raw_pci_ops) {
printk(KERN_WARNING "PCI: System does not support PCI\n");
return 0;
}
pcibios_set_cache_line_size();
pcibios_resource_survey();
if (pci_bf_sort >= pci_force_bf)
pci_sort_breadthfirst();
return 0;
}
void __init pcibios_resource_survey(void)
{
DBG("PCI: Allocating resources\n");
pcibios_allocate_bus_resources(&pci_root_buses);
pcibios_allocate_resources(0);
pcibios_allocate_resources(1);
e820_reserve_resources_late();
/*
* Insert the IO APIC resources after PCI initialization has
* occurred to handle IO APICS that are mapped in on a BAR in
* PCI space, but before trying to assign unassigned pci res.
*/
ioapic_insert_resources();
}
pcibios_allocate_bus_resources函数递归为每条子总线保留资源窗口,若可以保留,则将资源挂到父资源的链表中。
pcibios_allocate_resources(0)处理被启动的pci设备
pcibios_allocate_resources(1)处理被禁用的pci设备
3. pci设备驱动的介绍
struct pci_driver {
struct list_head node;
const char *name;
const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */
int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */
int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
int (*suspend_late) (struct pci_dev *dev, pm_message_t state);
int (*resume_early) (struct pci_dev *dev);
int (*resume) (struct pci_dev *dev); /* Device woken up */
void (*shutdown) (struct pci_dev *dev);
struct pci_error_handlers *err_handler;
struct device_driver driver;
struct pci_dynids dynids;
};
pci_register_driver注册pci驱动
pci_register_driver
driver_register
bus_add_driver
driver_attach
bus_for_each_dev(drv->bus, NULL, drv, __driver_attach)
__driver_attach
driver_match_device
pci_bus_match
pci_match_device //现在动态Id链表中匹配,再到静态ID表中匹配
driver_probe_device
really_probe
dev->bus->probe(dev)
pci_device_probe
__pci_device_probe
pci_match_device
pci_call_probe
local_pci_probe
到这里,pci框架已经大致介绍完了,具体的扫描过程可以参照《存储技术原理分析》。ddi->drv->probe(ddi->dev, ddi->id)