Solaris Source Insight: PCI bus driver moduls - pcieb driver module

我们接下来看 pcieb

 

[root@blu-nhm-ep:~]modinfo | grep PCI

15 fffffffffba46ce0 bfb0 - 1 pci_autoconfig (PCI BIOS interface)

37 fffffffffbacd3f0 ce28 183 1 npe (Host to PCIe nexus driver)

38 fffffffffbad95c8 5f50 - 1 pcihp (PCI nexus hotplug support)

40 fffffffffbae14f0 bb00 - 1 pcie (PCIE: PCI framework)

89 fffffffff7bff000 4c68 184 1 pcieb (PCIe to PCI nexus driver)

90 fffffffff7999000 1d68 84 1 pci_pci (PCI to PCI bridge nexus driver)

 

pcieb PCI-E to PCI bus bridge nexus driver 。通过下面的方法可以在 onnv-gate 中找到实现的文件。

 

[allen@blu-xvm-osol:uts]find . -name Makefile/* | xargs grep pcieb

./common/Makefile.files:PCIEB_OBJS += pcieb.o

./sparc/pcieb/Makefile:# uts/sparc/pcieb/Makefile

./sparc/pcieb/Makefile:# This makefile drives the production of the pcieb driver kernel module

./sparc/pcieb/Makefile:MODULE = pcieb

./sparc/pcieb_bcm/Makefile:# uts/sparc/pcieb_bcm/Makefile

./sparc/pcieb_bcm/Makefile:# This makefile drives the production of the pcieb_bcm driver kernel module

./sparc/pcieb_bcm/Makefile:MODULE = pcieb_bcm

./sparc/pcieb_bcm/Makefile:CPPFLAGS += -DPCIEB_BCM -DPX_MOD_NAME=pcieb_bcm

./sparc/Makefile.sparc.shared:DRV_KMODS += pci_pci pcieb pcieb_bcm pcie

./sparc/Makefile.files:PCIEB_OBJS += pcieb_sparc.o

./intel/Makefile.intel.shared:DRV_KMODS += pcieb

./intel/Makefile.files:PCIEB_OBJS += pcieb_x86.o

./intel/pcieb/Makefile:# uts/intel/pcieb/Makefile

./intel/pcieb/Makefile:# This makefile drives the production of the pcieb driver kernel

./intel/pcieb/Makefile:MODULE = pcieb

 

[allen@blu-xvm-osol:uts]find . -name pcieb.c

./common/io/pciex/pcieb.c

[allen@blu-xvm-osol:uts]find . -name pcieb_x86.c

./intel/io/pciex/pcieb_x86.c

 

注: 以下对源代码的引用,除非特殊说明,都引自 common/io/pciex/pcieb.c

这是一个 nexus driver , 它的 modlinkage 定义是:

 

185 /*

186 * Module linkage information for the kernel.

187 */

188

189 static struct modldrv modldrv = {

190 |_______&mod_driverops, /* Type of module */

191 |_______"PCIe bridge/switch driver",

192 |_______&pcieb_ops,|____/* driver ops */

193 };

194

195 static struct modlinkage modlinkage = {

196 |_______MODREV_1,

197 |_______(void *)&modldrv,

198 |_______NULL

199 };

 

其中, pcieb_ops 定义为:

 

170 static struct dev_ops pcieb_ops = {

171 |_______DEVO_REV,|______|_______/* devo_rev */

172 |_______0,|_____|_______|_______/* refcnt */

173 |_______pcieb_info,|____|_______/* info */

174 |_______nulldev,|_______|_______/* identify */

175 |_______pcieb_probe,|___|_______/* probe */

176 |_______pcieb_attach,|__|_______/* attach */

177 |_______pcieb_detach,|__|_______/* detach */

178 |_______nulldev,|_______|_______/* reset */

179 |_______&pcieb_cb_ops,|_|_______/* driver operations */

180 |_______&pcieb_bus_ops,||_______/* bus operations */

181 |_______pcie_power,|____|_______/* power */

182 |_______ddi_quiesce_not_needed,||_______/* quiesce */

183 };

 

Letś check _init() implementation.

 

215 int

216 _init(void)

217 {

218 |_______int e;

219

220 |_______if ((e = ddi_soft_state_init(&pcieb_state, sizeof (pcieb_devstate_t),

221 |_______ 1)) == 0 && (e = mod_install(&modlinkage)) != 0)

222 |_______|_______ddi_soft_state_fini(&pcieb_state);

223 |_______return (e);

224 }

 

It initializes the state structure for allocation. Next is attach function, pcieb_attach(). If itś called with DDI_RESUME command, pcie_pwr_resume() in pcie module is called. Otherwise, itś a normal attach command (DDI_ATTACH). When question comes to me when I read the first lines of attach function. The bus private data for this devinfo node is used directly in this function, but where does it be allocated and initilized? The common interface to allocate and initialize a pcie_bus_t data structure is provided by pcie module.

 

[common/io/pciex/pcie.c]

739 /*

740 * Initialize PCIe Bus Private Data

741 *

742 * PCIe Bus Private Data contains commonly used PCI/PCIe information and offsets

743 * to key registers.

744 */

745 pcie_bus_t *

746 pcie_init_bus(dev_info_t *cdip)

 

And following function call this function according to cscope back tracing.

 

Cscope tag: pcie_init_bus

# line filename / context / line

1 451 common/sys/pcie_impl.h <<GLOBAL>>

extern pcie_bus_t *pcie_init_bus(dev_info_t *cdip);

2 746 common/io/pciex/pcie.c <<pcie_init_bus>>

pcie_init_bus(dev_info_t *cdip)

3 777 common/io/pciex/pcieb.c <<pcieb_initchild>>

if (!pcie_init_bus(child) || pcie_initchild(child) != DDI_SUCCESS) {

4 942 i86pc/io/pciex/npe.c <<npe_initchild>>

bus_p = pcie_init_bus(child);

5 677 intel/io/pci/pci_pci.c <<ppb_initchild>>

if (pcie_init_bus(child) == NULL)

6 528 sun4/io/px/px_util.c <<px_init_child>>

if (pcie_init_bus(child))

7 966 sun4u/io/pci/pci_pci.c <<ppb_initchild>>

if (pcie_init_bus(child) == NULL) {

Type number and <Enter> (empty cancels):

 

We can find the fact that _initchild() of each nexus bus driver will call this interface to create pcie private data for it's children. So the question is translated to “in which path is the initchild function called?” The answer comes to ndi_devi_bind_driver(), which is called to bind a devinfo node with a specific driver module.

 

ndi_devi_bind_driver

-->i_ndi_config_node(DS_BOUND)

   -->init_node

      -->pdev->devi_ops->devo_bus_ops->bus_ctl(DDI_CTLOPS_INITCHILD)

        -->npe_ctlops / pepb_ctlops / ppb_ctlops / pci_ctlops /

        isa_ctlops / rootnex_ctlops / ata_disk_bus_ctl /

        cpunex_bus_ctl / ...

           -->np e_initchild / ppb_initchild / pcieb_initchild / …

 

When the devinfo node state is being changed from DS_BOUND to DS_INITIALIZED,

 

[common/os/devcfg.c]

1537 |_______|_______case DS_BOUND:

1538 |_______|_______|_______/*

1539 |_______|_______|_______ * The following transitions synchronizes on the

1540 |_______|_______|_______ * per-driver busy changing flag, since we already

1541 |_______|_______|_______ * have a driver.

1542 |_______|_______|_______ */

1543 |_______|_______|_______if ((rv = init_node(dip)) == DDI_SUCCESS)

1544 |_______|_______|_______|_______i_ddi_set_node_state(dip, DS_INITIALIZED);

1545 |_______|_______|_______break;

 

in init_node(), initchild function of the parent bus is called to initialize the children nodes.

 

[common/os/devcfg.c]

871 |_______/*

872 |_______ * Invoke the parent's bus_ctl operation with the DDI_CTLOPS_INITCHILD

873 |_______ * command to transform the child to canonical form 1. If there

874 |_______ * is an error, ddi_remove_child should be called, to clean up.

875 |_______ */

876 |_______error = (*f)(pdip, pdip, DDI_CTLOPS_INITCHILD, dip, NULL);

877 |_______if (error != DDI_SUCCESS) {

878 |_______|_______NDI_CONFIG_DEBUG((CE_CONT, "init_node: %s 0x%p failed/n",

879 |_______|_______ path, (void *)dip));

880 |_______|_______remove_global_props(dip);

881 |_______|_______/* in case nexus driver didn't clear this field */

882 |_______|_______ddi_set_name_addr(dip, NULL);

883 |_______|_______error = DDI_FAILURE;

884 |_______|_______goto out;

885 |_______}

 

Next, allocate and get the soft state structure. The state structure is defined as below.

 

[common/io/pciex/pcieb.h]

84 typedef struct {

85 |_______dev_info_t|_____|_______*pcieb_dip;

86

87 |_______/* Interrupt support */

88 |_______ddi_intr_handle_t|______*pcieb_htable;|_|_______/* Intr Handlers */

89 |_______int|____|_______|_______pcieb_htable_size;|_____/* htable size */

90 |_______int|____|_______|_______pcieb_intr_count;|______/* Num of Intr */

91 |_______uint_t|_|_______|_______pcieb_intr_priority;|___/* Intr Priority */

92 |_______int|____|_______|_______pcieb_intr_type;|_______/* (MSI | FIXED) */

93 |_______int|____|_______|_______pcieb_isr_tab[4];|______/* MSI source offset */

94

95 |_______int|____|_______|_______pcieb_init_flags;

96 |_______kmutex_t|_______|_______pcieb_mutex;|___|_______/* Soft state mutex */

97 |_______kmutex_t|_______|_______pcieb_intr_mutex;|______/* Intr handler mutex */

98 |_______kmutex_t|_______|_______pcieb_err_mutex;|_______/* Error mutex */

99 |_______kmutex_t|_______|_______pcieb_peek_poke_mutex; /* Peekpoke mutex */

100

101 |_______/* FMA */

102 |_______boolean_t|______|_______pcieb_no_aer_msi;

103 |_______ddi_iblock_cookie_t|____pcieb_fm_ibc;

104 } pcieb_devstate_t;

 

  1. Next >>>

  2. Fault management initialization. Initialize the mutex locks. And then create special properties for device identification. “first-in-chassis” property: set if “First In Chassis bit” of “Expansion Slot Register”, the first byte of “PCI Slot Id Capabilities” is set. "serialid#" property: set according to PCI-Express Device Serial Number Capability register.

  3. Next comes to “Power management setup”. This also makes sure that switch/bridge is at D0 during attach. The common pcie power management interfaces are implemented in pcie module.

  4. Make sure the devinfo node has “device_type” and “range” property. Set if not. “pciex” or “pci” according to the type of bridge. For PCI and PCI-X devices including PCIe2PCI bridge, initialize cache-line-size and latency timer configuration registers.

  5. Initialize bridge itself by calling pcie_init(). pci_init() is a common interface for PCIe devices provided by pcie module.

  6. Initialize interrupt handlers. Before interrupts are intialized, _OSC initialization needs to be done. _OSC object is a control method that is used by OSPM to communicate to the platform the feature support or capabilities provided by a device’s driver. This object is a child object of a device and may also exist in the /_SB scope, where it can be used to convey platform wide OSPM capabilities. Driver needs to evaluate _OSC to notify platform that it can handle advanced error. When Initializing interrupt handlers, if both MSI and FIXED are supported, try to attach MSI first. If MSI fails for any reason, then try FIXED, but only allow one type to be attached. For a bridge, interrupts are allocated and initilized for hotplug, PME and errors. PME is power management event. Components may wakeup the system using a wakeup mechanism followed by a power management event (PME) Message.

  7. Do any platform specific workarounds needed. x86 specific workarounds needed at the end of pcieb attach. Must apply workaround only after all initialization is done.

  8. If this is a root port, determine and set the max payload size. Since this will involve scanning the fabric, all error enabling and sw workarounds should be in place before doing this.

 

pcieb_detach() does the opposite operations.

 

Driver Operations:

 

For nexus bus driver, driver operations are mainly designed for bus/device control. pcieb_open() is called when the device special file is being opened by an application. It uses a mutex lock to keep exclusive open and calls pcie_open(). pcie_open() is a common interface provided by misc/pcie module. pcieb_close() does the opposite operations. pcieb_ioctl() relies on pcie_ioctl() to handle devctl and hotplug related ioctls.

 

Bus Operations:

 

Bus operation is core of a nuxus driver. Pcieb module defines the bus operations as below.

 

92 static struct bus_ops pcieb_bus_ops = {

93 |_______BUSO_REV,

94 |_______pcieb_bus_map,

95 |_______0,

96 |_______0,

97 |_______0,

98 |_______i_ddi_map_fault,

99 |_______ddi_dma_map,

100 |_______pcieb_dma_allochdl,

101 |_______ddi_dma_freehdl,

102 |_______ddi_dma_bindhdl,

103 |_______ddi_dma_unbindhdl,

104 |_______ddi_dma_flush,

105 |_______ddi_dma_win,

106 |_______pcieb_dma_mctl,

107 |_______pcieb_ctlops,

108 |_______ddi_bus_prop_op,

109 |_______ndi_busop_get_eventcookie,|_____/* (*bus_get_eventcookie)();|___*/

110 |_______ndi_busop_add_eventcall,|_______/* (*bus_add_eventcall)();|_____*/

111 |_______ndi_busop_remove_eventcall,|____/* (*bus_remove_eventcall)();|__*/

112 |_______ndi_post_event,||_______|_______/* (*bus_post_event)();||_______*/

113 |_______NULL,|__|_______|_______|_______/* (*bus_intr_ctl)();|__|_______*/

114 |_______NULL,|__|_______|_______|_______/* (*bus_config)(); |___|_______*/

115 |_______NULL,|__|_______|_______|_______/* (*bus_unconfig)(); |_|_______*/

116 |_______pcieb_fm_init_child,|___|_______/* (*bus_fm_init)(); |__|_______*/

117 |_______NULL,|__|_______|_______|_______/* (*bus_fm_fini)(); |__|_______*/

118 |_______i_ndi_busop_access_enter,|______/* (*bus_fm_access_enter)(); |__*/

119 |_______i_ndi_busop_access_exit,|_______/* (*bus_fm_access_exit)(); |___*/

120 |_______pcie_bus_power,||_______|_______/* (*bus_power)(); |____*/

121 |_______pcieb_intr_ops,||_______|_______/* (*bus_intr_op)(); |__|_______*/

122 |_______pcie_hp_common_ops|_____|_______/* (*bus_hp_op)(); |____|_______*/

123 };

 

As you can see, it implements bus_map(), dma_allochdl(), dma_mctl(), ctlops(), fm_initchild(), bus_power(), intr_ops() and hp_common_ops(). Let's check them out one by one.

 

  • pcieb_bus_map()

Call parent's bus_map() function.

  • pcieb_dma_allochdl()

A software workaround for PCI-X to PCI-E bridges.

 

1323 /*

1324 * Some PCI-X to PCI-E bridges do not support full 64-bit addressing on the

1325 * PCI-X side of the bridge. We build a special version of this driver for

1326 * those bridges, which uses PCIEB_ADDR_LIMIT_LO and/or PCIEB_ADDR_LIMIT_HI

1327 * to define the range of values which the chip can handle. The code below

1328 * then clamps the DMA address range supplied by the driver, preventing the

1329 * PCI-E nexus driver from allocating any memory the bridge can't deal

1330 * with.

1331 */

 

  • pcieb_dma_mctl()

FDVMA feature is not supported for any child device of Broadcom 5714/5715 PCIe-PCI bridge due to prefetch bug. Return failure immediately, so that these drivers will switch to regular DVMA path.

  • pcie_hp_common_ops()

  • pcieb_fm_init_child()

/* PASSTHROUGH */

  • pcieb_intr_ops()

No significant platform level operation, calling i_ddi_intr_ops() at last.

  • pcieb_ctlops()

How to understand the following comments.

 

[common/io/pciex/pcieb.c]

704 |_______/*

705 |_______ * Pseudo nodes indicate a prototype node with per-instance

706 |_______ * properties to be merged into the real h/w device node.

707 |_______ * The interpretation of the unit-address is DD[,F]

708 |_______ * where DD is the device id and F is the function.

709 |_______ */

 

Answer:

This is a magic to set per instance device properties by using solaris driver.conf file. You can create pseudo device nodes by adding rules to driver.conf in following format.

 

name="node name" class="class name" [property-name=value ...];

 

When loading driver.conf, which is before hardware device enemeration, Solaris will create those pseudo device nodes with given "node name" and properties. Later when enumerating hardware device nodes, it will try to merging real hardware device node with pseudo device node with the same "node name". By that way, you can assign per instance properties by using driver.conf file.

 

For code path, when a devinfo node is being initialized after binding to a driver, init_node() is called.

 

[common/os/devcfg.c]

1537 |_______|_______case DS_BOUND:

1538 |_______|_______|_______/*

1539 |_______|_______|_______ * The following transitions synchronizes on the

1540 |_______|_______|_______ * per-driver busy changing flag, since we already

1541 |_______|_______|_______ * have a driver.

1542 |_______|_______|_______ */

1543 |_______|_______|_______if ((rv = init_node(dip)) == DDI_SUCCESS)

1544 |_______|_______|_______|_______i_ddi_set_node_state(dip, DS_INITIALIZED);

1545 |_______|_______|_______break;

 

If the dip is a persistent node, then i_ndi_make_spec_children() is called to create and attach a dev_info node from a .conf file spec.

 

[common/os/devcfg.c]

983 |_______|_______/*

984 |_______|_______ * If the node is not a driver.conf node then merge

985 |_______|_______ * driver.conf properties from new path-bound driver.conf.

986 |_______|_______ */

987 |_______|_______if (ndi_dev_is_persistent_node(dip))

988 |_______|_______|_______(void) i_ndi_make_spec_children(pdip, 0);

 

In this function, get the spec from .conf file and call init_spec_child() for each spec. A devi_flag bit, DEVI_MADE_CHILDREN, is reserved for mark whether spec children have been created.

 

[common/os/devcfg.c]

4501 /*

4502 * Lookup hwc specs from hash tables and make children from the spec

4503 * Because some .conf children are "merge" nodes, we also initialize

4504 * .conf children to merge properties onto hardware nodes.

4505 *

4506 * The pdip must be held busy.

4507 */

4508 int

4509 i_ndi_make_spec_children(dev_info_t *pdip, uint_t flags)

4510 {

4511 |_______extern struct hwc_spec *hwc_get_child_spec(dev_info_t *, major_t);

4512 |_______int|____|_______|_______circ;

4513 |_______struct hwc_spec||_______*list, *spec;

4514

4515 |_______ndi_devi_enter(pdip, &circ);

4516 |_______if (DEVI(pdip)->devi_flags & DEVI_MADE_CHILDREN) {

4517 |_______|_______ndi_devi_exit(pdip, circ);

4518 |_______|_______return (DDI_SUCCESS);

4519 |_______}

4520

4521 |_______list = hwc_get_child_spec(pdip, DDI_MAJOR_T_NONE);

4522 |_______for (spec = list; spec != NULL; spec = spec->hwc_next) {

4523 |_______|_______init_spec_child(pdip, spec, flags);

4524 |_______}

4525 |_______hwc_free_spec_list(list);

4526

4527 |_______mutex_enter(&DEVI(pdip)->devi_lock);

4528 |_______DEVI(pdip)->devi_flags |= DEVI_MADE_CHILDREN;

4529 |_______mutex_exit(&DEVI(pdip)->devi_lock);

4530 |_______ndi_devi_exit(pdip, circ);

4531 |_______return (DDI_SUCCESS);

4532 }

 

[common/os/devcfg.c]

4470 /*

4471 * create and attach a dev_info node from a .conf file spec

4472 */

4473 static void

4474 init_spec_child(dev_info_t *pdip, struct hwc_spec *specp, uint_t flags)

4475 {

4476 |________NOTE(ARGUNUSED(flags))

4477 |_______dev_info_t *dip;

4478 |_______char *node_name;

4479

4480 |_______if (((node_name = specp->hwc_devi_name) == NULL) ||

4481 |_______ (ddi_name_to_major(node_name) == DDI_MAJOR_T_NONE)) {

4482 |_______|_______char *tmp = node_name;

4483 |_______|_______if (tmp == NULL)

4484 |_______|_______|_______tmp = "<none>";

4485 |_______|_______cmn_err(CE_CONT,

4486 |_______|_______ "init_spec_child: parent=%s, bad spec (%s)/n",

4487 |_______|_______ ddi_node_name(pdip), tmp);

4488 |_______|_______return;

4489 |_______}

4490

4491 |_______dip = i_ddi_alloc_node(pdip, node_name, (pnode_t)DEVI_PSEUDO_NODEID,

4492 |_______ -1, specp->hwc_devi_sys_prop_ptr, KM_SLEEP);

4493

4494 |_______if (dip == NULL)

4495 |_______|_______return;

4496

4497 |_______if (ddi_initchild(pdip, dip) != DDI_SUCCESS)

4498 |_______|_______(void) ddi_remove_child(dip, 0);

4499 }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值