ixgbe网卡驱动Ⅱ---- 驱动注册

目录

1 ixgbe 网卡注册驱动

1.1 ixgbe_driver 类

1.2 ixgbe_driver 注册/注销

2 ixgbe 的 PCI 注册驱动流程 pci_register_driver() 

3 ixgbe 网卡探测 ixgbe_probe()【核心】

3.1  ixgbe_info 选取

3.2 net_device/ixgbe_adapter  分配

3.3 读取eeprom中的mac地址,写入hw->mac.perm_addr

3.4 ixgbe_init_interrupt_scheme()

3.4.1 设置收发队列 ixgbe_set_num_queues() 

3.4.2 向PCI子系统请求中断 ixgbe_set_interrupt_capability()

3.4.3 申请中断向量表 ixgbe_alloc_q_vectors

3.4.4 rx/tx 中的描述符 fd 分配注册 ixgbe_cache_ring_register


1 ixgbe 网卡注册驱动

和大部分设备驱动一样,网卡驱动是作为一个 module 注册到 kernel 的
通过 module_init() -> ixgbe_init_module() -> pci_register_driver() 注册 ixgbe_driver
通过 module_exit() -> ixgbe_exit_module() -> pci_unregister_driver() 注销 ixgbe_driver

1.1 ixgbe_driver 类

static struct pci_driver ixgbe_driver = {
	.name     = ixgbe_driver_name,
	.id_table = ixgbe_pci_tbl,
	.probe    = ixgbe_probe,// 系统探测到ixgbe网卡后调用ixgbe_probe()
	.remove   = ixgbe_remove,
#ifdef CONFIG_PM
	.suspend  = ixgbe_suspend,
	.resume   = ixgbe_resume,
#endif
	.shutdown = ixgbe_shutdown,
	.sriov_configure = ixgbe_pci_sriov_configure,
	.err_handler = &ixgbe_err_handler
};

1.2 ixgbe_driver 注册/注销

/**
 * ixgbe_init_module - Driver Registration Routine
 *
 * ixgbe_init_module is the first routine called when the driver is
 * loaded. All it does is register with the PCI subsystem.
 **/
static int __init ixgbe_init_module(void)
{
	int ret;
	pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
	pr_info("%s\n", ixgbe_copyright);

	ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
	if (!ixgbe_wq) {
		pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
		return -ENOMEM;
	}

	ixgbe_dbg_init();

	ret = pci_register_driver(&ixgbe_driver); //注册ixgbe_driver
	if (ret) {
		destroy_workqueue(ixgbe_wq);
		ixgbe_dbg_exit();
		return ret;
	}

#ifdef CONFIG_IXGBE_DCA
	dca_register_notify(&dca_notifier);
#endif

	return 0;
}

module_init(ixgbe_init_module);

/**
 * ixgbe_exit_module - Driver Exit Cleanup Routine
 *
 * ixgbe_exit_module is called just before the driver is removed
 * from memory.
 **/
static void __exit ixgbe_exit_module(void)
{
#ifdef CONFIG_IXGBE_DCA
	dca_unregister_notify(&dca_notifier);
#endif
	pci_unregister_driver(&ixgbe_driver); //注销 ixgbe_driver

	ixgbe_dbg_exit();
	if (ixgbe_wq) {
		destroy_workqueue(ixgbe_wq);
		ixgbe_wq = NULL;
	}
}

module_exit(ixgbe_exit_module);

2 ixgbe 的 PCI 注册驱动流程 pci_register_driver() 

pci_register_driver() ->
__pci_register_driver() ->
driver_register() ->
bus_add_driver() ->
driver_attach() ->
bus_for_each_dev() ->
__driver_attach() ->
driver_probe_device() ->
really_probe() ->
pci_device_probe() ->
__pci_device_probe() ->
pci_call_probe() ->
local_pci_probe()

static long local_pci_probe(void *_ddi)
{
	...
	rc = pci_drv->probe(pci_dev, ddi->id); // 系统探测到设备后调用设备驱动的probe
	...
}

3 ixgbe 网卡探测 ixgbe_probe()【核心】

/**
 * ixgbe_probe - Device Initialization Routine
 * @pdev: PCI device information struct
 * @ent: entry in ixgbe_pci_tbl
 *
 * Returns 0 on success, negative on failure
 *
 * ixgbe_probe initializes an adapter identified by a pci_dev structure.
 * The OS initialization, configuring of the adapter private structure,
 * and a hardware reset occur.
 **/
static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
	struct net_device *netdev;
	struct ixgbe_adapter *adapter = NULL;
	struct ixgbe_hw *hw;
	const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];//根据网卡型号(82598/82599)选择ixgbe_info
	int i, err, pci_using_dac, expected_gts;
	unsigned int indices = MAX_TX_QUEUES;
	u8 part_str[IXGBE_PBANUM_LENGTH];
	bool disable_dev = false;
#ifdef IXGBE_FCOE
	u16 device_caps;
#endif
	u32 eec;

	/* Catch broken hardware that put the wrong VF device ID in
	 * the PCIe SR-IOV capability.
	 */
	if (pdev->is_virtfn) {
		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
		     pci_name(pdev), pdev->vendor, pdev->device);
		return -EINVAL;
	}

	/* pci_enable_device_mem() -> __pci_enable_device_flags() -> do_pci_enable_device()
		-> pcibios_enable_device() -> pci_enable_resources() -> pci_write_config_word()
		向配置寄存器Command(0x04)中写入 PCI_COMMAND_MEMORY(0x2),允许网卡驱动访问网卡的Memory空间 */
	err = pci_enable_device_mem(pdev);
	if (err)
		return err;

   /* pci_set_dma_mask() -> dma_set_mask() -> dma_supported()
      检查并设置PCI总线地址位数 */
	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
		pci_using_dac = 1;
	} else {
		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev,
				"No usable DMA configuration, aborting\n");
			goto err_dma;
		}
		pci_using_dac = 0;
	}


   /* pci_request_mem_regions() -> pci_request_selected_regions() -> __pci_request_selected_regions()
		-> __pci_request_region()-> request_region()/__request_mem_region()
		-> __request_region() -> __request_resource()
      登记BAR中的总线地址(将resource插入iomem_resource资源树) */
	err = pci_request_mem_regions(pdev, ixgbe_driver_name);
	if (err) {
		dev_err(&pdev->dev,
			"pci_request_selected_regions failed 0x%x\n", err);
		goto err_pci_reg;
	}

	pci_enable_pcie_error_reporting(pdev);
	
	/* pci_set_master() -> __pci_set_master() -> pci_write_config_word()
       向配置寄存器Command(0x04)中写入PCI_COMMAND_MASTER(0x4),允许网卡申请PCI总线控制权 */
	pci_set_master(pdev);
	

   /* pci_save_state() -> pci_read_config_dword()
      读取并保存配置空间到dev->saved_config_space */	
	pci_save_state(pdev);

	if (ii->mac == ixgbe_mac_82598EB) {
#ifdef CONFIG_IXGBE_DCB
		/* 8 TC w/ 4 queues per TC */
		indices = 4 * MAX_TRAFFIC_CLASS;
#else
		indices = IXGBE_MAX_RSS_INDICES;
#endif
	}

	// 分配net_device和ixgbe_adapter,发送队列数为 indices
	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
	if (!netdev) {
		err = -ENOMEM;
		goto err_alloc_etherdev;
	}

	SET_NETDEV_DEV(netdev, &pdev->dev);

	adapter = netdev_priv(netdev); //得到ixgbe_adapter的指针

	adapter->netdev = netdev;
	adapter->pdev = pdev;
	hw = &adapter->hw;	//得到ixgbe_hw的指针
	hw->back = adapter;
	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);


   // 将BAR0中的总线地址映射成内存地址,赋给hw->hw_addr,允许网卡驱动通过hw->hw_addr访问网卡的BAR0对应的Memory空间
	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
			      pci_resource_len(pdev, 0));
	adapter->io_addr = hw->hw_addr;
	if (!hw->hw_addr) {
		err = -EIO;
		goto err_ioremap;
	}

	netdev->netdev_ops = &ixgbe_netdev_ops;// 注册ixgbe_netdev_ops
	ixgbe_set_ethtool_ops(netdev);
	netdev->watchdog_timeo = 5 * HZ;
	strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));

	/* Setup hw api */
	hw->mac.ops   = *ii->mac_ops;
	hw->mac.type  = ii->mac;
	hw->mvals     = ii->mvals;
	if (ii->link_ops)
		hw->link.ops  = *ii->link_ops;

	/* EEPROM */
	hw->eeprom.ops = *ii->eeprom_ops;
	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));// 读取BAR0对应的Memory空间的IXGBE_EEC
	if (ixgbe_removed(hw->hw_addr)) {
		err = -EIO;
		goto err_ioremap;
	}
	/* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
	if (!(eec & BIT(8)))
		hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;

	/* PHY */
	hw->phy.ops = *ii->phy_ops;
	hw->phy.sfp_type = ixgbe_sfp_type_unknown;
	/* ixgbe_identify_phy_generic will set prtad and mmds properly */
	hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
	hw->phy.mdio.mmds = 0;
	hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
	hw->phy.mdio.dev = netdev;
	hw->phy.mdio.mdio_read = ixgbe_mdio_read;
	hw->phy.mdio.mdio_write = ixgbe_mdio_write;

	/* setup the private structure */
    /* 初始化ixgbe_adapter:
      设置adapter->tx/rx_ring_count为1024(默认1024,最小64,最大4096)
      设置adapter->ring_feature[RING_F_RSS].indices为min(CPU数, IXGBE_MAX_RSS_INDICES(16))
      设置adapter->ring_feature[RING_F_FDIR].indices为IXGBE_MAX_FDIR_INDICES(64)
      设置adapter->flags的IXGBE_FLAG_RSS_ENABLED和IXGBE_FLAG_FDIR_HASH_CAPABLE */
	err = ixgbe_sw_init(adapter, ii);
	if (err)
		goto err_sw_init;

	/* Make sure the SWFW semaphore is in a valid state */
	if (hw->mac.ops.init_swfw_sync)
		hw->mac.ops.init_swfw_sync(hw);

	/* Make it possible the adapter to be woken up via WOL */
	switch (adapter->hw.mac.type) {
	case ixgbe_mac_82599EB:
	case ixgbe_mac_X540:
	case ixgbe_mac_X550:
	case ixgbe_mac_X550EM_x:
	case ixgbe_mac_x550em_a:
		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
		break;
	default:
		break;
	}

	/*
	 * If there is a fan on this device and it has failed log the
	 * failure.
	 */
	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
		if (esdp & IXGBE_ESDP_SDP1)
			e_crit(probe, "Fan has stopped, replace the adapter\n");
	}

	if (allow_unsupported_sfp)
		hw->allow_unsupported_sfp = allow_unsupported_sfp;

	/* reset_hw fills in the perm_addr as well */
	hw->phy.reset_if_overtemp = true;

   /* ixgbe_reset_hw_82599() -> ixgbe_get_mac_addr_generic()
      读取eeprom中的mac地址,写入hw->mac.perm_addr */
	err = hw->mac.ops.reset_hw(hw);

	hw->phy.reset_if_overtemp = false;
	ixgbe_set_eee_capable(adapter);
	if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
		err = 0;
	} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
		e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
		e_dev_err("Reload the driver after installing a supported module.\n");
		goto err_sw_init;
	} else if (err) {
		e_dev_err("HW Init failed: %d\n", err);
		goto err_sw_init;
	}

#ifdef CONFIG_PCI_IOV
	/* SR-IOV not supported on the 82598 */
	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
		goto skip_sriov;
	/* Mailbox */
	ixgbe_init_mbx_params_pf(hw);
	hw->mbx.ops = ii->mbx_ops;
	pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT);
	ixgbe_enable_sriov(adapter, max_vfs);
skip_sriov:

#endif
	netdev->features = NETIF_F_SG |
			   NETIF_F_TSO |
			   NETIF_F_TSO6 |
			   NETIF_F_RXHASH |
			   NETIF_F_RXCSUM |
			   NETIF_F_HW_CSUM;

#define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
				    NETIF_F_GSO_GRE_CSUM | \
				    NETIF_F_GSO_IPXIP4 | \
				    NETIF_F_GSO_IPXIP6 | \
				    NETIF_F_GSO_UDP_TUNNEL | \
				    NETIF_F_GSO_UDP_TUNNEL_CSUM)

	netdev->gso_partial_features = IXGBE_GSO_PARTIAL_FEATURES;
	netdev->features |= NETIF_F_GSO_PARTIAL |
			    IXGBE_GSO_PARTIAL_FEATURES;

	if (hw->mac.type >= ixgbe_mac_82599EB)
		netdev->features |= NETIF_F_SCTP_CRC;

	/* copy netdev features into list of user selectable features */
	netdev->hw_features |= netdev->features |
			       NETIF_F_HW_VLAN_CTAG_FILTER |
			       NETIF_F_HW_VLAN_CTAG_RX |
			       NETIF_F_HW_VLAN_CTAG_TX |
			       NETIF_F_RXALL |
			       NETIF_F_HW_L2FW_DOFFLOAD;

	if (hw->mac.type >= ixgbe_mac_82599EB)
		netdev->hw_features |= NETIF_F_NTUPLE |
				       NETIF_F_HW_TC;

	if (pci_using_dac)
		netdev->features |= NETIF_F_HIGHDMA;

	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
	netdev->hw_enc_features |= netdev->vlan_features;
	netdev->mpls_features |= NETIF_F_SG |
				 NETIF_F_TSO |
				 NETIF_F_TSO6 |
				 NETIF_F_HW_CSUM;
	netdev->mpls_features |= IXGBE_GSO_PARTIAL_FEATURES;

	/* set this bit last since it cannot be part of vlan_features */
	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
			    NETIF_F_HW_VLAN_CTAG_RX |
			    NETIF_F_HW_VLAN_CTAG_TX;

	netdev->priv_flags |= IFF_UNICAST_FLT;
	netdev->priv_flags |= IFF_SUPP_NOFCS;

	/* MTU range: 68 - 9710 */
	netdev->min_mtu = ETH_MIN_MTU;
	netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);

#ifdef CONFIG_IXGBE_DCB
	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
		netdev->dcbnl_ops = &ixgbe_dcbnl_ops;
#endif

#ifdef IXGBE_FCOE
	if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
		unsigned int fcoe_l;

		if (hw->mac.ops.get_device_caps) {
			hw->mac.ops.get_device_caps(hw, &device_caps);
			if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
				adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
		}


		fcoe_l = min_t(int, IXGBE_FCRETA_SIZE, num_online_cpus());
		adapter->ring_feature[RING_F_FCOE].limit = fcoe_l;

		netdev->features |= NETIF_F_FSO |
				    NETIF_F_FCOE_CRC;

		netdev->vlan_features |= NETIF_F_FSO |
					 NETIF_F_FCOE_CRC |
					 NETIF_F_FCOE_MTU;
	}
#endif /* IXGBE_FCOE */

	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
		netdev->hw_features |= NETIF_F_LRO;
	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
		netdev->features |= NETIF_F_LRO;

	/* make sure the EEPROM is good */
	if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
		e_dev_err("The EEPROM Checksum Is Not Valid\n");
		err = -EIO;
		goto err_sw_init;
	}

	eth_platform_get_mac_address(&adapter->pdev->dev,
				     adapter->hw.mac.perm_addr);

	memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);

	if (!is_valid_ether_addr(netdev->dev_addr)) {
		e_dev_err("invalid MAC address\n");
		err = -EIO;
		goto err_sw_init;
	}

	/* Set hw->mac.addr to permanent MAC address */
	ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
	ixgbe_mac_set_default_filter(adapter);

	setup_timer(&adapter->service_timer, &ixgbe_service_timer,
		    (unsigned long) adapter);

	if (ixgbe_removed(hw->hw_addr)) {
		err = -EIO;
		goto err_sw_init;
	}
	INIT_WORK(&adapter->service_task, ixgbe_service_task);
	set_bit(__IXGBE_SERVICE_INITED, &adapter->state);
	clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);

   /* ixgbe_init_interrupt_scheme() -> ixgbe_set_num_queues() -> ixgbe_set_fdir_queues()/ixgbe_set_rss_queues()
                                       ixgbe_set_interrupt_capability() -> ixgbe_acquire_msix_vectors() -> pci_enable_msix()
                                       ixgbe_alloc_q_vectors()
      根据FDIR/RSS设置adapter->num_tx/rx_queues
      向PCI子系统请求中断
      设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
      分配发送/接收ring数组 */
	err = ixgbe_init_interrupt_scheme(adapter);
	if (err)
		goto err_sw_init;

	for (i = 0; i < adapter->num_rx_queues; i++)
		u64_stats_init(&adapter->rx_ring[i]->syncp);
	for (i = 0; i < adapter->num_tx_queues; i++)
		u64_stats_init(&adapter->tx_ring[i]->syncp);
	for (i = 0; i < adapter->num_xdp_queues; i++)
		u64_stats_init(&adapter->xdp_ring[i]->syncp);

	/* WOL not supported for all devices */
	adapter->wol = 0;
	hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
	hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device,
						pdev->subsystem_device);
	if (hw->wol_enabled)
		adapter->wol = IXGBE_WUFC_MAG;

	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);

	/* save off EEPROM version number */
	hw->eeprom.ops.read(hw, 0x2e, &adapter->eeprom_verh);
	hw->eeprom.ops.read(hw, 0x2d, &adapter->eeprom_verl);

	/* pick up the PCI bus settings for reporting later */
	if (ixgbe_pcie_from_parent(hw))
		ixgbe_get_parent_bus_info(adapter);
	else
		 hw->mac.ops.get_bus_info(hw);

	/* calculate the expected PCIe bandwidth required for optimal
	 * performance. Note that some older parts will never have enough
	 * bandwidth due to being older generation PCIe parts. We clamp these
	 * parts to ensure no warning is displayed if it can't be fixed.
	 */
	switch (hw->mac.type) {
	case ixgbe_mac_82598EB:
		expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16);
		break;
	default:
		expected_gts = ixgbe_enumerate_functions(adapter) * 10;
		break;
	}

	/* don't check link if we failed to enumerate functions */
	if (expected_gts > 0)
		ixgbe_check_minimum_link(adapter, expected_gts);

	err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
	if (err)
		strlcpy(part_str, "Unknown", sizeof(part_str));
	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
		e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
			   hw->mac.type, hw->phy.type, hw->phy.sfp_type,
			   part_str);
	else
		e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
			   hw->mac.type, hw->phy.type, part_str);

	e_dev_info("%pM\n", netdev->dev_addr);

	/* reset the hardware with the new settings */
	err = hw->mac.ops.start_hw(hw);
	if (err == IXGBE_ERR_EEPROM_VERSION) {
		/* We are running on a pre-production device, log a warning */
		e_dev_warn("This device is a pre-production adapter/LOM. "
			   "Please be aware there may be issues associated "
			   "with your hardware.  If you are experiencing "
			   "problems please contact your Intel or hardware "
			   "representative who provided you with this "
			   "hardware.\n");
	}
	strcpy(netdev->name, "eth%d");
	pci_set_drvdata(pdev, adapter);
	err = register_netdev(netdev);// 注册netdev
	if (err)
		goto err_register;


	/* power down the optics for 82599 SFP+ fiber */
	if (hw->mac.ops.disable_tx_laser)
		hw->mac.ops.disable_tx_laser(hw);

	/* carrier off reporting is important to ethtool even BEFORE open */
	netif_carrier_off(netdev);

#ifdef CONFIG_IXGBE_DCA
	if (dca_add_requester(&pdev->dev) == 0) {
		adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
		ixgbe_setup_dca(adapter);
	}
#endif
	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
		e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
		for (i = 0; i < adapter->num_vfs; i++)
			ixgbe_vf_configuration(pdev, (i | 0x10000000));
	}

	/* firmware requires driver version to be 0xFFFFFFFF
	 * since os does not support feature
	 */
	if (hw->mac.ops.set_fw_drv_ver)
		hw->mac.ops.set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF,
					   sizeof(ixgbe_driver_version) - 1,
					   ixgbe_driver_version);

	/* add san mac addr to netdev */
	ixgbe_add_sanmac_netdev(netdev);

	e_dev_info("%s\n", ixgbe_default_device_descr);

#ifdef CONFIG_IXGBE_HWMON
	if (ixgbe_sysfs_init(adapter))
		e_err(probe, "failed to allocate sysfs resources\n");
#endif /* CONFIG_IXGBE_HWMON */

	ixgbe_dbg_adapter_init(adapter);

	/* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */
	if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
		hw->mac.ops.setup_link(hw,
			IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
			true);

	return 0;

err_register:
	ixgbe_release_hw_control(adapter);
	ixgbe_clear_interrupt_scheme(adapter);
err_sw_init:
	ixgbe_disable_sriov(adapter);
	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
	iounmap(adapter->io_addr);
	kfree(adapter->jump_tables[0]);
	kfree(adapter->mac_table);
	kfree(adapter->rss_key);
err_ioremap:
	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
	free_netdev(netdev);
err_alloc_etherdev:
	pci_release_mem_regions(pdev);
err_pci_reg:
err_dma:
	if (!adapter || disable_dev)
		pci_disable_device(pdev);
	return err;
}

3.1  ixgbe_info 选取

根据网卡型号(82598/82599/540/550)在 ixgbe_info_tbl 列表中选择 ixgbe_info

const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];

static const struct ixgbe_info *ixgbe_info_tbl[] = {
	[board_82598]		= &ixgbe_82598_info,
	[board_82599]		= &ixgbe_82599_info,
	[board_X540]		= &ixgbe_X540_info,
	[board_X550]		= &ixgbe_X550_info,
	[board_X550EM_x]	= &ixgbe_X550EM_x_info,
	[board_x550em_x_fw]	= &ixgbe_x550em_x_fw_info,
	[board_x550em_a]	= &ixgbe_x550em_a_info,
	[board_x550em_a_fw]	= &ixgbe_x550em_a_fw_info,
};
  
enum ixgbe_boards {
	board_82598,
	board_82599,
	board_X540,
	board_X550,
	board_X550EM_x,
	board_x550em_x_fw,
	board_x550em_a,
	board_x550em_a_fw,
};

const struct ixgbe_info ixgbe_82599_info = {
	.mac                    = ixgbe_mac_82599EB,
	.get_invariants         = &ixgbe_get_invariants_82599,
	.mac_ops                = &mac_ops_82599,
	.eeprom_ops             = &eeprom_ops_82599,
	.phy_ops                = &phy_ops_82599,
	.mbx_ops                = &mbx_ops_generic,
	.mvals                  = ixgbe_mvals_8259X,
};

3.2 net_device/ixgbe_adapter  分配

netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);

struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
{
   return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
}
 
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
      void (*setup)(struct net_device *), unsigned int queue_count)
{
   struct netdev_queue *tx;
   struct net_device *dev;
   size_t alloc_size;
   struct net_device *p;
 
   BUG_ON(strlen(name) >= sizeof(dev->name));
 
   alloc_size = sizeof(struct net_device); // net_device的大小
   if (sizeof_priv) {
      /* ensure 32-byte alignment of private area */
      alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
      alloc_size += sizeof_priv; // 加上private data的大小
   }
   /* ensure 32-byte alignment of whole construct */
   alloc_size += NETDEV_ALIGN - 1;
 
   p = kzalloc(alloc_size, GFP_KERNEL); // 分配net_device和private data
   if (!p) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
      return NULL;
   }
 
   // 分配queue_count个netdev_queue(发送队列数组),一个发送队列对应一个netdev_queue
   tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
   if (!tx) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate "
             "tx qdiscs.\n");
      goto free_p;
   }
 
   dev = PTR_ALIGN(p, NETDEV_ALIGN);
   dev->padded = (char *)dev - (char *)p;
 
   if (dev_addr_init(dev))
      goto free_tx;
 
   dev_unicast_init(dev);
 
   dev_net_set(dev, &init_net);
 
   dev->_tx = tx; // 保存发送队列数组
   dev->num_tx_queues = queue_count; // 设置发送队列数
   dev->real_num_tx_queues = queue_count; // 设置实际发送队列数
 
   dev->gso_max_size = GSO_MAX_SIZE;
 
   netdev_init_queues(dev); // 设置dev->_tx[i]->dev和dev->rx_queue->dev为dev
 
   INIT_LIST_HEAD(&dev->napi_list);
   dev->priv_flags = IFF_XMIT_DST_RELEASE;
   setup(dev); // 以太网为ether_setup()
   strcpy(dev->name, name);
   return dev;
 
free_tx:
   kfree(tx);
 
free_p:
   kfree(p);
   return NULL;
}
 
static void netdev_init_queues(struct net_device *dev)
{
   netdev_init_one_queue(dev, &dev->rx_queue, NULL);
   netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
   spin_lock_init(&dev->tx_global_lock);
}
 
static void netdev_init_one_queue(struct net_device *dev,
              struct netdev_queue *queue,
              void *_unused)
{
   queue->dev = dev;
}
 
static inline void netdev_for_each_tx_queue(struct net_device *dev,
                   void (*f)(struct net_device *,
                        struct netdev_queue *,
                        void *),
                   void *arg)
{
   unsigned int i;
 
   for (i = 0; i < dev->num_tx_queues; i++)
      f(dev, &dev->_tx[i], arg);
}
 
void ether_setup(struct net_device *dev)
{
   dev->header_ops       = &eth_header_ops;
   dev->type     = ARPHRD_ETHER; // 以太网格式
   dev->hard_header_len   = ETH_HLEN; // 14
   dev->mtu      = ETH_DATA_LEN; // 1500
   dev->addr_len     = ETH_ALEN; // 6
   dev->tx_queue_len  = 1000;    /* Ethernet wants good queues */
   dev->flags    = IFF_BROADCAST|IFF_MULTICAST;
 
   memset(dev->broadcast, 0xFF, ETH_ALEN);
 
}

3.3 读取eeprom中的mac地址,写入hw->mac.perm_addr

struct ixgbe_info ixgbe_82599_info = {
   .mac                    = ixgbe_mac_82599EB,
   .get_invariants         = &ixgbe_get_invariants_82599,
   .mac_ops                = &mac_ops_82599,
   .eeprom_ops             = &eeprom_ops_82599,
   .phy_ops                = &phy_ops_82599,
};
 
static struct ixgbe_mac_operations mac_ops_82599 = {
   .init_hw                = &ixgbe_init_hw_generic,
   .reset_hw               = &ixgbe_reset_hw_82599,
   .start_hw               = &ixgbe_start_hw_82599,
   .clear_hw_cntrs         = &ixgbe_clear_hw_cntrs_generic,
   .get_media_type         = &ixgbe_get_media_type_82599,
   .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82599,
   .enable_rx_dma          = &ixgbe_enable_rx_dma_82599,
   .get_mac_addr           = &ixgbe_get_mac_addr_generic,
   .get_san_mac_addr       = &ixgbe_get_san_mac_addr_82599,
   .get_device_caps        = &ixgbe_get_device_caps_82599,
   .stop_adapter           = &ixgbe_stop_adapter_generic,
   .get_bus_info           = &ixgbe_get_bus_info_generic,
   .set_lan_id             = &ixgbe_set_lan_id_multi_port_pcie,
   .read_analog_reg8       = &ixgbe_read_analog_reg8_82599,
   .write_analog_reg8      = &ixgbe_write_analog_reg8_82599,
   .setup_link             = &ixgbe_setup_mac_link_82599,
   .check_link             = &ixgbe_check_mac_link_82599,
   .get_link_capabilities  = &ixgbe_get_link_capabilities_82599,
   .led_on                 = &ixgbe_led_on_generic,
   .led_off                = &ixgbe_led_off_generic,
   .blink_led_start        = &ixgbe_blink_led_start_generic,
   .blink_led_stop         = &ixgbe_blink_led_stop_generic,
   .set_rar                = &ixgbe_set_rar_generic,
   .clear_rar              = &ixgbe_clear_rar_generic,
   .set_vmdq               = &ixgbe_set_vmdq_82599,
   .clear_vmdq             = &ixgbe_clear_vmdq_82599,
   .init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
   .update_uc_addr_list    = &ixgbe_update_uc_addr_list_generic,
   .update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
   .enable_mc              = &ixgbe_enable_mc_generic,
   .disable_mc             = &ixgbe_disable_mc_generic,
   .clear_vfta             = &ixgbe_clear_vfta_82599,
   .set_vfta               = &ixgbe_set_vfta_82599,
   .fc_enable               = &ixgbe_fc_enable_generic,
   .init_uta_tables        = &ixgbe_init_uta_tables_82599,
   .setup_sfp              = &ixgbe_setup_sfp_modules_82599,
};
 
static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
{
   s32 status = 0;
   u32 ctrl, ctrl_ext;
   u32 i;
   u32 autoc;
   u32 autoc2;
 
   /* Call adapter stop to disable tx/rx and clear interrupts */
   hw->mac.ops.stop_adapter(hw);
 
   /* PHY ops must be identified and initialized prior to reset */
 
   /* Init PHY and function pointers, perform SFP setup */
   status = hw->phy.ops.init(hw);
 
   if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
      goto reset_hw_out;
 
   /* Setup SFP module if there is one present. */
   if (hw->phy.sfp_setup_needed) {
      status = hw->mac.ops.setup_sfp(hw);
      hw->phy.sfp_setup_needed = false;
   }
 
   /* Reset PHY */
   if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
      hw->phy.ops.reset(hw);
 
   /*
    * Prevent the PCI-E bus from from hanging by disabling PCI-E master
    * access and verify no pending requests before reset
    */
   status = ixgbe_disable_pcie_master(hw);
   if (status != 0) {
      status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
      hw_dbg(hw, "PCI-E Master disable polling has failed.\n");
   }
 
   /*
    * Issue global reset to the MAC.  This needs to be a SW reset.
    * If link reset is used, it might reset the MAC when mng is using it
    */
   ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
   IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | IXGBE_CTRL_RST));
   IXGBE_WRITE_FLUSH(hw);
 
   /* Poll for reset bit to self-clear indicating reset is complete */
   for (i = 0; i < 10; i++) {
      udelay(1);
      ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
      if (!(ctrl & IXGBE_CTRL_RST))
         break;
   }
   if (ctrl & IXGBE_CTRL_RST) {
      status = IXGBE_ERR_RESET_FAILED;
      hw_dbg(hw, "Reset polling failed to complete.\n");
   }
   /* Clear PF Reset Done bit so PF/VF Mail Ops can work */
   ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
   ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
   IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
 
   msleep(50);
 
 
 
   /*
    * Store the original AUTOC/AUTOC2 values if they have not been
    * stored off yet.  Otherwise restore the stored original
    * values since the reset operation sets back to defaults.
    */
   autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
   autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
   if (hw->mac.orig_link_settings_stored == false) {
      hw->mac.orig_autoc = autoc;
      hw->mac.orig_autoc2 = autoc2;
      hw->mac.orig_link_settings_stored = true;
   } else {
      if (autoc != hw->mac.orig_autoc)
         IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
                         IXGBE_AUTOC_AN_RESTART));
 
      if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
          (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
         autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
         autoc2 |= (hw->mac.orig_autoc2 &
                    IXGBE_AUTOC2_UPPER_MASK);
         IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
      }
   }
 
   /*
    * Store MAC address from RAR0, clear receive address registers, and
    * clear the multicast table.  Also reset num_rar_entries to 128,
    * since we modify this value when programming the SAN MAC address.
    */
   hw->mac.num_rar_entries = 128;
   hw->mac.ops.init_rx_addrs(hw);
 
   /* Store the permanent mac address */
   hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); // 读取eeprom中的mac地址,写入hw->mac.perm_addr
 
   /* Store the permanent SAN mac address */
   hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
 
   /* Add the SAN MAC address to the RAR only if it's a valid address */
   if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
      hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
                          hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
      /* Reserve the last RAR for the SAN MAC address */
      hw->mac.num_rar_entries--;
   }
 
reset_hw_out:
   return status;
}
 
s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
{
   u32 rar_high;
   u32 rar_low;
   u16 i;
 
   rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
   rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
 
   for (i = 0; i < 4; i++)
      mac_addr[i] = (u8)(rar_low >> (i*8));
 
   for (i = 0; i < 2; i++)
      mac_addr[i+4] = (u8)(rar_high >> (i*8));
 
   return 0;
}
 
#define IXGBE_RAL(_i)   (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
                         (0x0A200 + ((_i) * 8)))
#define IXGBE_RAH(_i)   (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
                         (0x0A204 + ((_i) * 8)))

3.4 ixgbe_init_interrupt_scheme()

/**
 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
 * @adapter: board private structure to initialize
 *
 * We determine which interrupt scheme to use based on...
 * - Kernel support (MSI, MSI-X)
 *   - which can be user-defined (via MODULE_PARAM)
 * - Hardware queue count (num_*_queues)
 *   - defined by miscellaneous hardware support/features (RSS, etc.)
 **/
int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
{
	int err;

	/* Number of supported queues */
	ixgbe_set_num_queues(adapter); //根据FDIR/RSS设置adapter->num_tx/rx_queues

	/* Set interrupt mode */
	ixgbe_set_interrupt_capability(adapter); //向PCI子系统请求中断

	err = ixgbe_alloc_q_vectors(adapter);	//设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
	if (err) {
		e_dev_err("Unable to allocate memory for queue vectors\n");
		goto err_alloc_q_vectors;
	}

	ixgbe_cache_ring_register(adapter);// 分配发送/接收ring数组

	e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
		   (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
		   adapter->num_rx_queues, adapter->num_tx_queues,
		   adapter->num_xdp_queues);

	set_bit(__IXGBE_DOWN, &adapter->state);

	return 0;

err_alloc_q_vectors:
	ixgbe_reset_interrupt_capability(adapter);
	return err;
}

3.4.1 设置收发队列 ixgbe_set_num_queues() 

/**
 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
 * @adapter: board private structure to initialize
 *
 * This is the top level queue allocation routine.  The order here is very
 * important, starting with the "most" number of features turned on at once,
 * and ending with the smallest set of features.  This way large combinations
 * can be allocated if they're turned on, and smaller combinations are the
 * fallthrough conditions.
 *
 **/
static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
{
	/* Start with base case */
	adapter->num_rx_queues = 1;
	adapter->num_tx_queues = 1;
	adapter->num_xdp_queues = 0;
	adapter->num_rx_pools = adapter->num_rx_queues;
	adapter->num_rx_queues_per_pool = 1;

#ifdef CONFIG_IXGBE_DCB
	if (ixgbe_set_dcb_sriov_queues(adapter))
		return;

	if (ixgbe_set_dcb_queues(adapter))
		return;

#endif
	if (ixgbe_set_sriov_queues(adapter))
		return;

	ixgbe_set_rss_queues(adapter);
}

3.4.2 向PCI子系统请求中断 ixgbe_set_interrupt_capability()

/**
 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
 * @adapter: board private structure to initialize
 *
 * Attempt to configure the interrupts using the best available
 * capabilities of the hardware and the kernel.
 **/
static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
{
	int err;

	/* We will try to get MSI-X interrupts first */
	if (!ixgbe_acquire_msix_vectors(adapter))
		return;

	/* At this point, we do not have MSI-X capabilities. We need to
	 * reconfigure or disable various features which require MSI-X
	 * capability.
	 */

	/* Disable DCB unless we only have a single traffic class */
	if (netdev_get_num_tc(adapter->netdev) > 1) {
		e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
		netdev_reset_tc(adapter->netdev);

		if (adapter->hw.mac.type == ixgbe_mac_82598EB)
			adapter->hw.fc.requested_mode = adapter->last_lfc_mode;

		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
		adapter->temp_dcb_cfg.pfc_mode_enable = false;
		adapter->dcb_cfg.pfc_mode_enable = false;
	}

	adapter->dcb_cfg.num_tcs.pg_tcs = 1;
	adapter->dcb_cfg.num_tcs.pfc_tcs = 1;

	/* Disable SR-IOV support */
	e_dev_warn("Disabling SR-IOV support\n");
	ixgbe_disable_sriov(adapter);

	/* Disable RSS */
	e_dev_warn("Disabling RSS support\n");
	adapter->ring_feature[RING_F_RSS].limit = 1;

	/* recalculate number of queues now that many features have been
	 * changed or disabled.
	 */
	ixgbe_set_num_queues(adapter);
	adapter->num_q_vectors = 1;

	err = pci_enable_msi(adapter->pdev);  //向PCI子系统请求1个msi中断
	if (err)
		e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
			   err);
	else
		adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
}

3.4.3 申请中断向量表 ixgbe_alloc_q_vectors

/**
 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
 * @adapter: board private structure to initialize
 *
 * We allocate one q_vector per queue interrupt.  If allocation fails we
 * return -ENOMEM.
 **/
static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
{
	int q_vectors = adapter->num_q_vectors;
	int rxr_remaining = adapter->num_rx_queues;
	int txr_remaining = adapter->num_tx_queues;
	int xdp_remaining = adapter->num_xdp_queues;
	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
	int err;

	/* only one q_vector if MSI-X is disabled. */
    // 使用MSIX(Message Signaled Interrupt-X)
    // 去掉绑定ixgbe0所在NUMA的所有CPU的msix中断(LSC等)
	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
		q_vectors = 1;

	if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
		for (; rxr_remaining; v_idx++) {
			err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
						   0, 0, 0, 0, 1, rxr_idx); //分配ixgbe_q_vector

			if (err)
				goto err_out;

			/* update counts and index */
			rxr_remaining--;
			rxr_idx++;
		}
	}

	for (; v_idx < q_vectors; v_idx++) {
		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
		int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors - v_idx);

		err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
					   tqpv, txr_idx,
					   xqpv, xdp_idx,
					   rqpv, rxr_idx);

		if (err)
			goto err_out;

		/* update counts and index */
		rxr_remaining -= rqpv;
		txr_remaining -= tqpv;
		xdp_remaining -= xqpv;
		rxr_idx++;
		txr_idx++;
		xdp_idx += xqpv;
	}

	return 0;

err_out:
	adapter->num_tx_queues = 0;
	adapter->num_xdp_queues = 0;
	adapter->num_rx_queues = 0;
	adapter->num_q_vectors = 0;

	while (v_idx--)
		ixgbe_free_q_vector(adapter, v_idx);

	return -ENOMEM;
}

中断向量表分配 ixgbe_alloc_q_vector,NAPI的模式的 poll 函数(ixgbe_poll:一次读取64个数据包)注册

/**
 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
 * @adapter: board private structure to initialize
 * @v_count: q_vectors allocated on adapter, used for ring interleaving
 * @v_idx: index of vector in adapter struct
 * @txr_count: total number of Tx rings to allocate
 * @txr_idx: index of first Tx ring to allocate
 * @xdp_count: total number of XDP rings to allocate
 * @xdp_idx: index of first XDP ring to allocate
 * @rxr_count: total number of Rx rings to allocate
 * @rxr_idx: index of first Rx ring to allocate
 *
 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
 **/
static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
				int v_count, int v_idx,
				int txr_count, int txr_idx,
				int xdp_count, int xdp_idx,
				int rxr_count, int rxr_idx)
{
	struct ixgbe_q_vector *q_vector;
	struct ixgbe_ring *ring;
	int node = NUMA_NO_NODE;
	int cpu = -1;
	int ring_count, size;
	u8 tcs = netdev_get_num_tc(adapter->netdev);

	ring_count = txr_count + rxr_count + xdp_count;
	size = sizeof(struct ixgbe_q_vector) +
	       (sizeof(struct ixgbe_ring) * ring_count);

	/* customize cpu for Flow Director mapping */
	if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
		u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
		if (rss_i > 1 && adapter->atr_sample_rate) {
			if (cpu_online(v_idx)) {
				cpu = v_idx;
				node = cpu_to_node(cpu);
			}
		}
	}

	/* allocate q_vector and rings */
	q_vector = kzalloc_node(size, GFP_KERNEL, node);
	if (!q_vector)
		q_vector = kzalloc(size, GFP_KERNEL);
	if (!q_vector)
		return -ENOMEM;

	/* setup affinity mask and node */
	if (cpu != -1)
		cpumask_set_cpu(cpu, &q_vector->affinity_mask);
	q_vector->numa_node = node;

#ifdef CONFIG_IXGBE_DCA
	/* initialize CPU for DCA */
	q_vector->cpu = -1;

#endif

    /* 初始化q_vector->napi并加入adapter->netdev的napi_list,
       其中poll函数为ixgbe_clean_rxtx_many()/ixgbe_poll(),一次poll的最大报文数为64 */

	/* initialize NAPI */
	netif_napi_add(adapter->netdev, &q_vector->napi,
		       ixgbe_poll, 64);



	/* tie q_vector and adapter together */
	adapter->q_vector[v_idx] = q_vector;// 地址赋给adapter->q_vector[q_idx]
	q_vector->adapter = adapter;
	q_vector->v_idx = v_idx;

	/* initialize work limits */
	q_vector->tx.work_limit = adapter->tx_work_limit;

	/* initialize pointer to rings */
	ring = q_vector->ring;

	/* intialize ITR */
	if (txr_count && !rxr_count) {
		/* tx only vector */
		if (adapter->tx_itr_setting == 1)
			q_vector->itr = IXGBE_12K_ITR;
		else
			q_vector->itr = adapter->tx_itr_setting;
	} else {
		/* rx or rx/tx vector */
		if (adapter->rx_itr_setting == 1)
			q_vector->itr = IXGBE_20K_ITR;
		else
			q_vector->itr = adapter->rx_itr_setting;
	}

	while (txr_count) {
		/* assign generic ring traits */
		ring->dev = &adapter->pdev->dev;
		ring->netdev = adapter->netdev;

		/* configure backlink on ring */
		ring->q_vector = q_vector;

		/* update q_vector Tx values */
		ixgbe_add_ring(ring, &q_vector->tx);

		/* apply Tx specific ring traits */
		ring->count = adapter->tx_ring_count;
		if (adapter->num_rx_pools > 1)
			ring->queue_index =
				txr_idx % adapter->num_rx_queues_per_pool;
		else
			ring->queue_index = txr_idx;

		/* assign ring to adapter */
		WRITE_ONCE(adapter->tx_ring[txr_idx], ring);

		/* update count and index */
		txr_count--;
		txr_idx += v_count;

		/* push pointer to next ring */
		ring++;
	}

	while (xdp_count) {
		/* assign generic ring traits */
		ring->dev = &adapter->pdev->dev;
		ring->netdev = adapter->netdev;

		/* configure backlink on ring */
		ring->q_vector = q_vector;

		/* update q_vector Tx values */
		ixgbe_add_ring(ring, &q_vector->tx);

		/* apply Tx specific ring traits */
		ring->count = adapter->tx_ring_count;
		ring->queue_index = xdp_idx;
		set_ring_xdp(ring);

		/* assign ring to adapter */
		WRITE_ONCE(adapter->xdp_ring[xdp_idx], ring);

		/* update count and index */
		xdp_count--;
		xdp_idx++;

		/* push pointer to next ring */
		ring++;
	}

	while (rxr_count) {
		/* assign generic ring traits */
		ring->dev = &adapter->pdev->dev;
		ring->netdev = adapter->netdev;

		/* configure backlink on ring */
		ring->q_vector = q_vector;

		/* update q_vector Rx values */
		ixgbe_add_ring(ring, &q_vector->rx);

		/*
		 * 82599 errata, UDP frames with a 0 checksum
		 * can be marked as checksum errors.
		 */
		if (adapter->hw.mac.type == ixgbe_mac_82599EB)
			set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);

#ifdef IXGBE_FCOE
		if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
			struct ixgbe_ring_feature *f;
			f = &adapter->ring_feature[RING_F_FCOE];
			if ((rxr_idx >= f->offset) &&
			    (rxr_idx < f->offset + f->indices))
				set_bit(__IXGBE_RX_FCOE, &ring->state);
		}

#endif /* IXGBE_FCOE */
		/* apply Rx specific ring traits */
		ring->count = adapter->rx_ring_count;
		if (adapter->num_rx_pools > 1)
			ring->queue_index =
				rxr_idx % adapter->num_rx_queues_per_pool;
		else
			ring->queue_index = rxr_idx;

		/* assign ring to adapter */
		WRITE_ONCE(adapter->rx_ring[rxr_idx], ring);

		/* update count and index */
		rxr_count--;
		rxr_idx += v_count;

		/* push pointer to next ring */
		ring++;
	}

	return 0;
}

3.4.4 rx/tx 中的描述符 fd 分配注册 ixgbe_cache_ring_register

/**
 * ixgbe_cache_ring_register - Descriptor ring to register mapping
 * @adapter: board private structure to initialize
 *
 * Once we know the feature-set enabled for the device, we'll cache
 * the register offset the descriptor ring is assigned to.
 *
 * Note, the order the various feature calls is important.  It must start with
 * the "most" features enabled at the same time, then trickle down to the
 * least amount of features turned on at once.
 **/
static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
{
	/* start with default case */
	adapter->rx_ring[0]->reg_idx = 0;
	adapter->tx_ring[0]->reg_idx = 0;

#ifdef CONFIG_IXGBE_DCB
	if (ixgbe_cache_ring_dcb_sriov(adapter))
		return;

	if (ixgbe_cache_ring_dcb(adapter))
		return;

#endif
	if (ixgbe_cache_ring_sriov(adapter))
		return;

	ixgbe_cache_ring_rss(adapter);
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值