一、PCI总线结构
PCI总线结构是一种树形、层次式的体系结构。其中负责连接树形结构父节点与子节点的重要部件称为:PCI桥。PCI桥在PCI总线结构中占据着重要的地位,它将父总线与子总线连接在一起。PCI桥包括以下几种:
Host/PCI桥:用于连接CPU与PCI根总线,第1个根总线的编号为0。它是一个特殊的PCI桥设备,将树顶端的CPU与根PCI总线连接起来。
PCI-to-PCI桥:用于连接PCI主总线与次总线。PCI桥所处的PCI总线称为“主总线”(即次总线的父总线),桥设备所连接的PCI总线称为“次总线”(即主总线的子总线)。
PCI/ISA桥:用于连接旧的ISA总线。
下图展示了CPU、PCI总线和PCI设备的连接关系,本文图均出自《Linux设备驱动开发详解》 。PCI桥的引入极大增加了PCI总线的复杂性,但其引入也方便了PCI总线进行设备的扩展。
二、PCI总线的Linux描述
在Linux系统中,PCI总线用pci_bus描述。这个结构体记录了本PCI总线的信息以及本PCI总线的父总线、子总线、桥设备信息。这个结构体的定义如下所示:
struct pci_bus {
struct list_head node; /* 链表元素node */
struct pci_bus *parent; /* 指向该pci总线的父总线,即pci桥所在的总线 */
struct list_head children; /* 该条pci总线的子总线链表的表头 */
struct list_head devices; /* 该条pci总线的逻辑设备链表的表头 */
struct pci_dev *self; /* 指向引出这条pci总线的桥设备的pci_dev结构 */
struct list_head slots; /* 受pci_slot_mutex保护的此总线上的插槽列表 */
struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
struct list_head resources; /* 指向应路由到这条总线的地址空间资源 */
struct resource busn_res; /* 路由到此总线的总线资源 */
struct pci_ops *ops; /* 这条pci总线所使用的配置空间访问函数 */
void *sysdata; /* 指向系统特定的扩展函数 */
struct proc_dir_entry *procdir; /* 该总线在/proc/bus/pci中对应的目录项 */
unsigned char number; /* 这条pci总线的编号 */
unsigned char primary; /* 桥设备的主总线 */
unsigned char max_bus_speed;
unsigned char cur_bus_speed;
#ifdef CONFIG_PCI_DOMAINS_GENERIC
int domain_nr;
#endif
char name[48];
unsigned short bridge_ctl;
pci_bus_flags_t bus_flags;
struct device *bridge;
struct device dev;
struct bin_attribute *legacy_io;
struct bin_attribute *legacy_mem;
unsigned int is_added:1;
unsigned int unsafe_warn:1;
};
系统中当前存在的所有根总线都通过其pci_bus结构体中的node成员变量链接成 一条全局的根总线链表,其表头由list_head类型的全局变量pci_root_buses来描述。而根总线下面的所有下级总线都通过其pci_bus结构体中的node成员链接到其父总线的children链表中。这样,通过这两种PCI总线链表,Linux内核就将所有的pci_bus结构体以一种倒置树的方式组织起来。在访问所有总线时,以DFS的方式进行访问。链表连接结构如下图所示。
三、PCI设备的Linux描述
在Linux系统中所有的PCI设备都可以用pci_dev结构体来描述,由于一个PCI接口卡上可能包含多个功能模块,每个功能都可当作一个独立的逻辑设备,也唯一的对应一个pci_dev设备描述符。该结构体定义如下。
struct pci_dev {
struct list_head bus_list; /* 总线设备链表元素 */
struct pci_bus *bus; /* 此设备所在的总线pci_bus结构体 */
struct pci_bus *subordinate; /* 指向这个pci设备所桥接的下级总线 */
void *sysdata; /* 指向一片特定于系统的扩展数据 */
struct proc_dir_entry *procent; /* 改pci设备在/proc/bus/pci中对应的目录项 */
struct pci_slot *slot; /* 指向此pci设备所占用的插槽 */
unsigned int devfn; /* 这个pci设备的设备功能号 */
unsigned short vendor;
unsigned short device;
unsigned short subsystem_vendor;
unsigned short subsystem_device;
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
u8 revision; /* PCI revision, low byte of class word */
u8 hdr_type; /* PCI header type (`multi' flag masked out) */
#ifdef CONFIG_PCIEAER
u16 aer_cap; /* AER capability offset */
struct aer_stats *aer_stats; /* AER stats for this device */
#endif
#ifdef CONFIG_PCIEPORTBUS
struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */
struct pci_dev *rcec; /* Associated RCEC device */
#endif
u32 devcap; /* PCIe Device Capabilities */
u8 pcie_cap; /* PCIe capability offset */
u8 msi_cap; /* MSI capability offset */
u8 msix_cap; /* MSI-X capability offset */
u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */
u8 rom_base_reg; /* Config register controlling ROM */
u8 pin; /* Interrupt pin this device uses */
u16 pcie_flags_reg; /* Cached PCIe Capabilities Register */
unsigned long *dma_alias_mask;/* Mask of enabled devfn aliases */
struct pci_driver *driver; /* Driver bound to this device */
u64 dma_mask; /* Mask of the bits of bus address this
device implements. Normally this is
0xffffffff. You only need to change
this if your device has broken DMA
or supports 64-bit transfers. */
struct device_dma_parameters dma_parms;
pci_power_t current_state; /* Current operating state. In ACPI,
this is D0-D3, D0 being fully
functional, and D3 being off. */
u8 pm_cap; /* PM capability offset */
unsigned int imm_ready:1; /* Supports Immediate Readiness */
unsigned int pme_support:5; /* Bitmask of states from which PME#
can be generated */
unsigned int pme_poll:1; /* Poll device's PME status bit */
unsigned int d1_support:1; /* Low power state D1 is supported */
unsigned int d2_support:1; /* Low power state D2 is supported */
unsigned int no_d1d2:1; /* D1 and D2 are forbidden */
unsigned int no_d3cold:1; /* D3cold is forbidden */
unsigned int bridge_d3:1; /* Allow D3 for bridge */
unsigned int d3cold_allowed:1; /* D3cold is allowed by user */
unsigned int mmio_always_on:1; /* Disallow turning off io/mem
decoding during BAR sizing */
unsigned int wakeup_prepared:1;
unsigned int skip_bus_pm:1; /* Internal: Skip bus-level PM */
unsigned int ignore_hotplug:1; /* Ignore hotplug events */
unsigned int hotplug_user_indicators:1; /* SlotCtl indicators
controlled exclusively by
user sysfs */
unsigned int clear_retrain_link:1; /* Need to clear Retrain Link
bit manually */
unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */
unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */
#ifdef CONFIG_PCIEASPM
struct pcie_link_state *link_state; /* ASPM link state */
u16 l1ss; /* L1SS Capability pointer */
unsigned int ltr_path:1; /* Latency Tolerance Reporting
supported from root to here */
#endif
unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */
unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */
pci_channel_state_t error_state; /* Current connectivity state */
struct device dev; /* Generic device interface */
int cfg_size; /* Size of config space */
/*
* Instead of touching interrupt line and base address registers
* directly, use the values stored here. They might be different!
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
struct resource driver_exclusive_resource; /* driver exclusive resource ranges */
bool match_driver; /* Skip attaching driver */
unsigned int transparent:1; /* Subtractive decode bridge */
unsigned int io_window:1; /* Bridge has I/O window */
unsigned int pref_window:1; /* Bridge has pref mem window */
unsigned int pref_64_window:1; /* Pref mem window is 64-bit */
unsigned int multifunction:1; /* Multi-function device */
unsigned int is_busmaster:1; /* Is busmaster */
unsigned int no_msi:1; /* May not use MSI */
unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */
unsigned int block_cfg_access:1; /* Config space access blocked */
unsigned int broken_parity_status:1; /* Generates false positive parity */
unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */
unsigned int msi_enabled:1;
unsigned int msix_enabled:1;
unsigned int ari_enabled:1; /* ARI forwarding */
unsigned int ats_enabled:1; /* Address Translation Svc */
unsigned int pasid_enabled:1; /* Process Address Space ID */
unsigned int pri_enabled:1; /* Page Request Interface */
unsigned int is_managed:1; /* Managed via devres */
unsigned int is_msi_managed:1; /* MSI release via devres installed */
unsigned int needs_freset:1; /* Requires fundamental reset */
unsigned int state_saved:1;
unsigned int is_physfn:1;
unsigned int is_virtfn:1;
unsigned int is_hotplug_bridge:1;
unsigned int shpc_managed:1; /* SHPC owned by shpchp */
unsigned int is_thunderbolt:1; /* Thunderbolt controller */
/*
* Devices marked being untrusted are the ones that can potentially
* execute DMA attacks and similar. They are typically connected
* through external ports such as Thunderbolt but not limited to
* that. When an IOMMU is enabled they should be getting full
* mappings to make sure they cannot access arbitrary memory.
*/
unsigned int untrusted:1;
/*
* Info from the platform, e.g., ACPI or device tree, may mark a
* device as "external-facing". An external-facing device is
* itself internal but devices downstream from it are external.
*/
unsigned int external_facing:1;
unsigned int broken_intx_masking:1; /* INTx masking can't be used */
unsigned int io_window_1k:1; /* Intel bridge 1K I/O windows */
unsigned int irq_managed:1;
unsigned int non_compliant_bars:1; /* Broken BARs; ignore them */
unsigned int is_probed:1; /* Device probing in progress */
unsigned int link_active_reporting:1;/* Device capable of reporting link active */
unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */
unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */
unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
spinlock_t pcie_cap_lock; /* Protects RMW ops in capability accessors */
u32 saved_config_space[16]; /* Config space saved at suspend time */
struct hlist_head saved_cap_space;
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
#ifdef CONFIG_HOTPLUG_PCI_PCIE
unsigned int broken_cmd_compl:1; /* No compl for some cmds */
#endif
#ifdef CONFIG_PCIE_PTM
u16 ptm_cap; /* PTM Capability */
unsigned int ptm_root:1;
unsigned int ptm_enabled:1;
u8 ptm_granularity;
#endif
#ifdef CONFIG_PCI_MSI
void __iomem *msix_base;
raw_spinlock_t msi_lock;
#endif
struct pci_vpd vpd;
#ifdef CONFIG_PCIE_DPC
u16 dpc_cap;
unsigned int dpc_rp_extensions:1;
u8 dpc_rp_log_size;
#endif
#ifdef CONFIG_PCI_ATS
union {
struct pci_sriov *sriov; /* PF: SR-IOV info */
struct pci_dev *physfn; /* VF: related PF */
};
u16 ats_cap; /* ATS Capability offset */
u8 ats_stu; /* ATS Smallest Translation Unit */
#endif
#ifdef CONFIG_PCI_PRI
u16 pri_cap; /* PRI Capability offset */
u32 pri_reqs_alloc; /* Number of PRI requests allocated */
unsigned int pasid_required:1; /* PRG Response PASID Required */
#endif
#ifdef CONFIG_PCI_PASID
u16 pasid_cap; /* PASID Capability offset */
u16 pasid_features;
#endif
#ifdef CONFIG_PCI_P2PDMA
struct pci_p2pdma __rcu *p2pdma;
#endif
#ifdef CONFIG_PCI_DOE
struct xarray doe_mbs; /* Data Object Exchange mailboxes */
#endif
u16 acs_cap; /* ACS Capability offset */
phys_addr_t rom; /* Physical address if not from BAR */
size_t romlen; /* Length if not from BAR */
/*
* Driver name to force a match. Do not set directly, because core
* frees it. Use driver_set_override() to set or clear it.
*/
const char *driver_override;
unsigned long priv_flags; /* Private flags for the PCI driver */
/* These methods index pci_reset_fn_methods[] */
u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
};
在linux系统中,同属一条PCI总线上的所有PCI设备通过其pci_dev结构体中的bus_list成员链接成一个属于这条PCI总线的总线设备链表,表头由该PCI总线的pci_bus结构中的devices成员所定义。下图为PCI设备链表的典型例子。