ixgbe网卡驱动(一)

注册网卡驱动

和大部分设备驱动一样,网卡驱动是作为一个module注册到kernel的
通过module_init() -> ixgbe_init_module() -> pci_register_driver()注册ixgbe_driver
通过module_exit() -> ixgbe_exit_module() -> pci_unregister_driver()注销ixgbe_driver

static struct pci_driver ixgbe_driver = {
	.name     = ixgbe_driver_name,
	.id_table = ixgbe_pci_tbl,
	.probe    = ixgbe_probe, // 系统探测到ixgbe网卡后调用ixgbe_probe()
	.remove   = ixgbe_remove,
#ifdef CONFIG_PM
	.suspend  = ixgbe_suspend,
	.resume   = ixgbe_resume,
#endif
	.shutdown = ixgbe_shutdown,
	.sriov_configure = ixgbe_pci_sriov_configure,
	.err_handler = &ixgbe_err_handler
};

static int __init ixgbe_init_module(void)
{
	...
	ret = pci_register_driver(&ixgbe_driver); // 注册ixgbe_driver
	...
}

module_init(ixgbe_init_module);

static void __exit ixgbe_exit_module(void)
{
	...
	pci_unregister_driver(&ixgbe_driver); // 注销ixgbe_driver
	...
}

module_exit(ixgbe_exit_module);

pci_register_driver()

pci_register_driver() ->
__pci_register_driver() ->
driver_register() ->
bus_add_driver() ->
driver_attach() ->
bus_for_each_dev() ->
__driver_attach() ->
driver_probe_device() ->
really_probe() ->
pci_device_probe() ->
__pci_device_probe() ->
pci_call_probe() ->
local_pci_probe()

static long local_pci_probe(void *_ddi)
{
	...
	rc = pci_drv->probe(pci_dev, ddi->id); // 系统探测到设备后调用设备驱动的probe
	...
}

ixgbe_probe()

static int __devinit ixgbe_probe(struct pci_dev *pdev,
                                 const struct pci_device_id *ent)
{
   struct net_device *netdev;
   struct ixgbe_adapter *adapter = NULL;
   struct ixgbe_hw *hw;
   const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; // 根据网卡型号(82598/82599)选择ixgbe_info
   static int cards_found;
   int i, err, pci_using_dac;
#ifdef IXGBE_FCOE
   u16 device_caps;
#endif
   u32 part_num, eec;
 
   /* pci_enable_device_mem() -> __pci_enable_device_flags() -> do_pci_enable_device()
      -> pcibios_enable_device() -> pci_enable_resources() -> pci_write_config_word()
      向配置寄存器Command(0x04)中写入PCI_COMMAND_MEMORY(0x2),允许网卡驱动访问网卡的Memory空间 */
   err = pci_enable_device_mem(pdev);
   if (err)
      return err;
 
   /* pci_set_dma_mask() -> dma_set_mask() -> dma_supported()
      检查并设置PCI总线地址位数 */
   if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
       !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
      pci_using_dac = 1;
   } else {
      err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
      if (err) {
         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
         if (err) {
            dev_err(&pdev->dev, "No usable DMA "
                    "configuration, aborting\n");
            goto err_dma;
         }
      }
      pci_using_dac = 0;
   }
 
   /* pci_request_selected_regions() -> __pci_request_selected_regions() -> __pci_request_region()
      -> request_region()/__request_mem_region() -> __request_region() -> __request_resource()
      登记BAR中的总线地址(将resource插入iomem_resource资源树) */
   err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
                                      IORESOURCE_MEM), ixgbe_driver_name);
   if (err) {
      dev_err(&pdev->dev,
              "pci_request_selected_regions failed 0x%x\n", err);
      goto err_pci_reg;
   }
 
   pci_enable_pcie_error_reporting(pdev);
 
   /* pci_set_master() -> __pci_set_master() -> pci_write_config_word()
      向配置寄存器Command(0x04)中写入PCI_COMMAND_MASTER(0x4),允许网卡申请PCI总线控制权 */
   pci_set_master(pdev);
   /* pci_save_state() -> pci_read_config_dword()
      读取并保存配置空间到dev->saved_config_space */
   pci_save_state(pdev);

   // 分配net_device和ixgbe_adapter,发送队列数为MAX_TX_QUEUES(128)
   netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);
   if (!netdev) {
      err = -ENOMEM;
      goto err_alloc_etherdev;
   }
 
   SET_NETDEV_DEV(netdev, &pdev->dev);
 
   pci_set_drvdata(pdev, netdev);
   adapter = netdev_priv(netdev); // 得到ixgbe_adapter的指针
 
   adapter->netdev = netdev;
   adapter->pdev = pdev;
   hw = &adapter->hw; // 得到ixgbe_hw的指针
   hw->back = adapter;
   adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
 
   // 将BAR0中的总线地址映射成内存地址,赋给hw->hw_addr,允许网卡驱动通过hw->hw_addr访问网卡的BAR0对应的Memory空间
   hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
                         pci_resource_len(pdev, 0));
   if (!hw->hw_addr) {
      err = -EIO;
      goto err_ioremap;
   }
 
   for (i = 1; i <= 5; i++) {
      if (pci_resource_len(pdev, i) == 0)
         continue;
   }
 
   netdev->netdev_ops = &ixgbe_netdev_ops; // 注册ixgbe_netdev_ops
   ixgbe_set_ethtool_ops(netdev);
   netdev->watchdog_timeo = 5 * HZ;
   strcpy(netdev->name, pci_name(pdev));
 
   adapter->bd_number = cards_found; // 设置adapter->bd_number为0
 
   /* Setup hw api */
   memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops));
   hw->mac.type  = ii->mac;
 
   /* EEPROM */
   memcpy(&hw->eeprom.ops, ii->eeprom_ops, sizeof(hw->eeprom.ops));
   eec = IXGBE_READ_REG(hw, IXGBE_EEC); // 读取BAR0对应的Memory空间的IXGBE_EEC
   /* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
   if (!(eec & (1 << 8)))
      hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;
 
   /* PHY */
   memcpy(&hw->phy.ops, ii->phy_ops, sizeof(hw->phy.ops));
   hw->phy.sfp_type = ixgbe_sfp_type_unknown;
   /* ixgbe_identify_phy_generic will set prtad and mmds properly */
   hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
   hw->phy.mdio.mmds = 0;
   hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
   hw->phy.mdio.dev = netdev;
   hw->phy.mdio.mdio_read = ixgbe_mdio_read;
   hw->phy.mdio.mdio_write = ixgbe_mdio_write;
 
   /* set up this timer and work struct before calling get_invariants
    * which might start the timer
    */
   init_timer(&adapter->sfp_timer);
   adapter->sfp_timer.function = &ixgbe_sfp_timer;
   adapter->sfp_timer.data = (unsigned long) adapter;
 
   INIT_WORK(&adapter->sfp_task, ixgbe_sfp_task);
 
   /* multispeed fiber has its own tasklet, called from GPI SDP1 context */
   INIT_WORK(&adapter->multispeed_fiber_task, ixgbe_multispeed_fiber_task);
 
   /* a new SFP+ module arrival, called from GPI SDP2 context */
   INIT_WORK(&adapter->sfp_config_module_task,
             ixgbe_sfp_config_module_task);
   /* ixgbe_get_invariants_82599() -> ixgbe_get_pcie_msix_count_82599()
      设置hw->mac->max_tx/rx_queues为IXGBE_82599_MAX_TX/RX_QUEUES(128)
      读取并保存EEPROM的MSI_X_N(0x3F = 63)到hw->mac->max_msix_vectors */
   ii->get_invariants(hw);
 
   /* setup the private structure */
   /* 初始化ixgbe_adapter:
      设置adapter->tx/rx_ring_count为1024(默认1024,最小64,最大4096)
      设置adapter->ring_feature[RING_F_RSS].indices为min(CPU数, IXGBE_MAX_RSS_INDICES(16))
      设置adapter->ring_feature[RING_F_FDIR].indices为IXGBE_MAX_FDIR_INDICES(64)
      设置adapter->flags的IXGBE_FLAG_RSS_ENABLED和IXGBE_FLAG_FDIR_HASH_CAPABLE */
   err = ixgbe_sw_init(adapter);
   if (err)
      goto err_sw_init;
 
   /*
    * If there is a fan on this device and it has failed log the
    * failure.
    */
   if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
      u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
      if (esdp & IXGBE_ESDP_SDP1)
         DPRINTK(PROBE, CRIT,
            "Fan has stopped, replace the adapter\n");
   }
 
   /* reset_hw fills in the perm_addr as well */
   /* ixgbe_reset_hw_82599() -> ixgbe_get_mac_addr_generic()
      读取eeprom中的mac地址,写入hw->mac.perm_addr */
   err = hw->mac.ops.reset_hw(hw);
   if (err == IXGBE_ERR_SFP_NOT_PRESENT &&
       hw->mac.type == ixgbe_mac_82598EB) {
      /*
       * Start a kernel thread to watch for a module to arrive.
       * Only do this for 82598, since 82599 will generate
       * interrupts on module arrival.
       */
      set_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
      mod_timer(&adapter->sfp_timer,
           round_jiffies(jiffies + (2 * HZ)));
      err = 0;
   } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
      dev_err(&adapter->pdev->dev, "failed to initialize because "
         "an unsupported SFP+ module type was detected.\n"
         "Reload the driver after installing a supported "
         "module.\n");
      goto err_sw_init;
   } else if (err) {
      dev_err(&adapter->pdev->dev, "HW Init failed: %d\n", err);
      goto err_sw_init;
   }
 
   netdev->features = NETIF_F_SG |
                      NETIF_F_IP_CSUM |
                      NETIF_F_HW_VLAN_TX |
                      NETIF_F_HW_VLAN_RX |
                      NETIF_F_HW_VLAN_FILTER;
 
   netdev->features |= NETIF_F_IPV6_CSUM;
   netdev->features |= NETIF_F_TSO;
   netdev->features |= NETIF_F_TSO6;
   netdev->features |= NETIF_F_GRO;
 
   if (adapter->hw.mac.type == ixgbe_mac_82599EB)
      netdev->features |= NETIF_F_SCTP_CSUM;
 
   netdev->vlan_features |= NETIF_F_TSO;
   netdev->vlan_features |= NETIF_F_TSO6;
   netdev->vlan_features |= NETIF_F_IP_CSUM;
   netdev->vlan_features |= NETIF_F_IPV6_CSUM;
   netdev->vlan_features |= NETIF_F_SG;
 
   if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
      adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 
#ifdef CONFIG_IXGBE_DCB
   netdev->dcbnl_ops = &dcbnl_ops;
#endif
 
#ifdef IXGBE_FCOE
   if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
      if (hw->mac.ops.get_device_caps) {
         hw->mac.ops.get_device_caps(hw, &device_caps);
         if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
            adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
      }
   }
#endif /* IXGBE_FCOE */
   if (pci_using_dac)
      netdev->features |= NETIF_F_HIGHDMA;
 
   if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
      netdev->features |= NETIF_F_LRO;
 
   /* make sure the EEPROM is good */
   if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
      dev_err(&pdev->dev, "The EEPROM Checksum Is Not Valid\n");
      err = -EIO;
      goto err_eeprom;
   }
 
   memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); // 将mac地址赋给netdev->dev_addr
   memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len);
 
   if (ixgbe_validate_mac_addr(netdev->perm_addr)) {
      dev_err(&pdev->dev, "invalid MAC address\n");
      err = -EIO;
      goto err_eeprom;
   }
 
   init_timer(&adapter->watchdog_timer);
   adapter->watchdog_timer.function = &ixgbe_watchdog;
   adapter->watchdog_timer.data = (unsigned long)adapter;
 
   INIT_WORK(&adapter->reset_task, ixgbe_reset_task);
   INIT_WORK(&adapter->watchdog_task, ixgbe_watchdog_task);
 
   /* ixgbe_init_interrupt_scheme() -> ixgbe_set_num_queues() -> ixgbe_set_fdir_queues()/ixgbe_set_rss_queues()
                                       ixgbe_set_interrupt_capability() -> ixgbe_acquire_msix_vectors() -> pci_enable_msix()
                                       ixgbe_alloc_q_vectors()
                                       ixgbe_alloc_queues()
      根据FDIR/RSS设置adapter->num_tx/rx_queues
      向PCI子系统请求中断
      设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
      分配发送/接收ring数组 */
   err = ixgbe_init_interrupt_scheme(adapter);
   if (err)
      goto err_sw_init;
 
   switch (pdev->device) {
   case IXGBE_DEV_ID_82599_KX4:
      adapter->wol = (IXGBE_WUFC_MAG | IXGBE_WUFC_EX |
                      IXGBE_WUFC_MC | IXGBE_WUFC_BC);
      /* Enable ACPI wakeup in GRC */
      IXGBE_WRITE_REG(hw, IXGBE_GRC,
                   (IXGBE_READ_REG(hw, IXGBE_GRC) & ~IXGBE_GRC_APME));
      break;
   default:
      adapter->wol = 0;
      break;
   }
   device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
 
   /* pick up the PCI bus settings for reporting later */
   hw->mac.ops.get_bus_info(hw);
 
   /* print bus type/speed/width info */
   dev_info(&pdev->dev, "(PCI Express:%s:%s) %pM\n",
           ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
            (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
           ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
            (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
            (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
            "Unknown"),
           netdev->dev_addr);
   ixgbe_read_pba_num_generic(hw, &part_num);
   if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
      dev_info(&pdev->dev, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %06x-%03x\n",
               hw->mac.type, hw->phy.type, hw->phy.sfp_type,
               (part_num >> 8), (part_num & 0xff));
   else
      dev_info(&pdev->dev, "MAC: %d, PHY: %d, PBA No: %06x-%03x\n",
               hw->mac.type, hw->phy.type,
               (part_num >> 8), (part_num & 0xff));
 
   if (hw->bus.width <= ixgbe_bus_width_pcie_x4) {
      dev_warn(&pdev->dev, "PCI-Express bandwidth available for "
               "this card is not sufficient for optimal "
               "performance.\n");
      dev_warn(&pdev->dev, "For optimal performance a x8 "
               "PCI-Express slot is required.\n");
   }
 
   /* save off EEPROM version number */
   hw->eeprom.ops.read(hw, 0x29, &adapter->eeprom_version);
 
   /* reset the hardware with the new settings */
   err = hw->mac.ops.start_hw(hw);
 
   if (err == IXGBE_ERR_EEPROM_VERSION) {
      /* We are running on a pre-production device, log a warning */
      dev_warn(&pdev->dev, "This device is a pre-production "
               "adapter/LOM.  Please be aware there may be issues "
               "associated with your hardware.  If you are "
               "experiencing problems please contact your Intel or "
               "hardware representative who provided you with this "
               "hardware.\n");
   }
   strcpy(netdev->name, "eth%d");
   err = register_netdev(netdev); // 注册netdev
   if (err)
      goto err_register;
 
   /* carrier off reporting is important to ethtool even BEFORE open */
   netif_carrier_off(netdev);
 
   if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
       adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
      INIT_WORK(&adapter->fdir_reinit_task, ixgbe_fdir_reinit_task);
 
#ifdef CONFIG_IXGBE_DCA
   if (dca_add_requester(&pdev->dev) == 0) {
      adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
      ixgbe_setup_dca(adapter);
   }
#endif
   /* add san mac addr to netdev */
   ixgbe_add_sanmac_netdev(netdev);
 
   dev_info(&pdev->dev, "Intel(R) 10 Gigabit Network Connection\n");
   cards_found++;
   return 0;
 
err_register:
   ixgbe_release_hw_control(adapter);
   ixgbe_clear_interrupt_scheme(adapter);
err_sw_init:
err_eeprom:
   clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
   del_timer_sync(&adapter->sfp_timer);
   cancel_work_sync(&adapter->sfp_task);
   cancel_work_sync(&adapter->multispeed_fiber_task);
   cancel_work_sync(&adapter->sfp_config_module_task);
   iounmap(hw->hw_addr);
err_ioremap:
   free_netdev(netdev);
err_alloc_etherdev:
   pci_release_selected_regions(pdev, pci_select_bars(pdev,
                                IORESOURCE_MEM));
err_pci_reg:
err_dma:
   pci_disable_device(pdev);
   return err;
}

主要步骤

1、根据网卡型号(82598/82599/540/550)选择ixgbe_info

const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];

static const struct ixgbe_info *ixgbe_info_tbl[] = {
	[board_82598]		= &ixgbe_82598_info,
	[board_82599]		= &ixgbe_82599_info,
	[board_X540]		= &ixgbe_X540_info,
	[board_X550]		= &ixgbe_X550_info,
	[board_X550EM_x]	= &ixgbe_X550EM_x_info,
	[board_x550em_x_fw]	= &ixgbe_x550em_x_fw_info,
	[board_x550em_a]	= &ixgbe_x550em_a_info,
	[board_x550em_a_fw]	= &ixgbe_x550em_a_fw_info,
};
  
enum ixgbe_boards {
	board_82598,
	board_82599,
	board_X540,
	board_X550,
	board_X550EM_x,
	board_x550em_x_fw,
	board_x550em_a,
	board_x550em_a_fw,
};

const struct ixgbe_info ixgbe_82599_info = {
	.mac                    = ixgbe_mac_82599EB,
	.get_invariants         = &ixgbe_get_invariants_82599,
	.mac_ops                = &mac_ops_82599,
	.eeprom_ops             = &eeprom_ops_82599,
	.phy_ops                = &phy_ops_82599,
	.mbx_ops                = &mbx_ops_generic,
	.mvals                  = ixgbe_mvals_8259X,
};

分配net_device和ixgbe_adapter

netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);

struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
{
   return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
}
 
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
      void (*setup)(struct net_device *), unsigned int queue_count)
{
   struct netdev_queue *tx;
   struct net_device *dev;
   size_t alloc_size;
   struct net_device *p;
 
   BUG_ON(strlen(name) >= sizeof(dev->name));
 
   alloc_size = sizeof(struct net_device); // net_device的大小
   if (sizeof_priv) {
      /* ensure 32-byte alignment of private area */
      alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
      alloc_size += sizeof_priv; // 加上private data的大小
   }
   /* ensure 32-byte alignment of whole construct */
   alloc_size += NETDEV_ALIGN - 1;
 
   p = kzalloc(alloc_size, GFP_KERNEL); // 分配net_device和private data
   if (!p) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
      return NULL;
   }
 
   // 分配queue_count个netdev_queue(发送队列数组),一个发送队列对应一个netdev_queue
   tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
   if (!tx) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate "
             "tx qdiscs.\n");
      goto free_p;
   }
 
   dev = PTR_ALIGN(p, NETDEV_ALIGN);
   dev->padded = (char *)dev - (char *)p;
 
   if (dev_addr_init(dev))
      goto free_tx;
 
   dev_unicast_init(dev);
 
   dev_net_set(dev, &init_net);
 
   dev->_tx = tx; // 保存发送队列数组
   dev->num_tx_queues = queue_count; // 设置发送队列数
   dev->real_num_tx_queues = queue_count; // 设置实际发送队列数
 
   dev->gso_max_size = GSO_MAX_SIZE;
 
   netdev_init_queues(dev); // 设置dev->_tx[i]->dev和dev->rx_queue->dev为dev
 
   INIT_LIST_HEAD(&dev->napi_list);
   dev->priv_flags = IFF_XMIT_DST_RELEASE;
   setup(dev); // 以太网为ether_setup()
   strcpy(dev->name, name);
   return dev;
 
free_tx:
   kfree(tx);
 
free_p:
   kfree(p);
   return NULL;
}
 
static void netdev_init_queues(struct net_device *dev)
{
   netdev_init_one_queue(dev, &dev->rx_queue, NULL);
   netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
   spin_lock_init(&dev->tx_global_lock);
}
 
static void netdev_init_one_queue(struct net_device *dev,
              struct netdev_queue *queue,
              void *_unused)
{
   queue->dev = dev;
}
 
static inline void netdev_for_each_tx_queue(struct net_device *dev,
                   void (*f)(struct net_device *,
                        struct netdev_queue *,
                        void *),
                   void *arg)
{
   unsigned int i;
 
   for (i = 0; i < dev->num_tx_queues; i++)
      f(dev, &dev->_tx[i], arg);
}
 
void ether_setup(struct net_device *dev)
{
   dev->header_ops       = &eth_header_ops;
   dev->type     = ARPHRD_ETHER; // 以太网格式
   dev->hard_header_len   = ETH_HLEN; // 14
   dev->mtu      = ETH_DATA_LEN; // 1500
   dev->addr_len     = ETH_ALEN; // 6
   dev->tx_queue_len  = 1000;    /* Ethernet wants good queues */
   dev->flags    = IFF_BROADCAST|IFF_MULTICAST;
 
   memset(dev->broadcast, 0xFF, ETH_ALEN);
 
}

读取eeprom中的mac地址,写入hw->mac.perm_addr

err = hw->mac.ops.reset_hw(hw);

struct ixgbe_info ixgbe_82599_info = {
   .mac                    = ixgbe_mac_82599EB,
   .get_invariants         = &ixgbe_get_invariants_82599,
   .mac_ops                = &mac_ops_82599,
   .eeprom_ops             = &eeprom_ops_82599,
   .phy_ops                = &phy_ops_82599,
};
 
static struct ixgbe_mac_operations mac_ops_82599 = {
   .init_hw                = &ixgbe_init_hw_generic,
   .reset_hw               = &ixgbe_reset_hw_82599,
   .start_hw               = &ixgbe_start_hw_82599,
   .clear_hw_cntrs         = &ixgbe_clear_hw_cntrs_generic,
   .get_media_type         = &ixgbe_get_media_type_82599,
   .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82599,
   .enable_rx_dma          = &ixgbe_enable_rx_dma_82599,
   .get_mac_addr           = &ixgbe_get_mac_addr_generic,
   .get_san_mac_addr       = &ixgbe_get_san_mac_addr_82599,
   .get_device_caps        = &ixgbe_get_device_caps_82599,
   .stop_adapter           = &ixgbe_stop_adapter_generic,
   .get_bus_info           = &ixgbe_get_bus_info_generic,
   .set_lan_id             = &ixgbe_set_lan_id_multi_port_pcie,
   .read_analog_reg8       = &ixgbe_read_analog_reg8_82599,
   .write_analog_reg8      = &ixgbe_write_analog_reg8_82599,
   .setup_link             = &ixgbe_setup_mac_link_82599,
   .check_link             = &ixgbe_check_mac_link_82599,
   .get_link_capabilities  = &ixgbe_get_link_capabilities_82599,
   .led_on                 = &ixgbe_led_on_generic,
   .led_off                = &ixgbe_led_off_generic,
   .blink_led_start        = &ixgbe_blink_led_start_generic,
   .blink_led_stop         = &ixgbe_blink_led_stop_generic,
   .set_rar                = &ixgbe_set_rar_generic,
   .clear_rar              = &ixgbe_clear_rar_generic,
   .set_vmdq               = &ixgbe_set_vmdq_82599,
   .clear_vmdq             = &ixgbe_clear_vmdq_82599,
   .init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
   .update_uc_addr_list    = &ixgbe_update_uc_addr_list_generic,
   .update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
   .enable_mc              = &ixgbe_enable_mc_generic,
   .disable_mc             = &ixgbe_disable_mc_generic,
   .clear_vfta             = &ixgbe_clear_vfta_82599,
   .set_vfta               = &ixgbe_set_vfta_82599,
   .fc_enable               = &ixgbe_fc_enable_generic,
   .init_uta_tables        = &ixgbe_init_uta_tables_82599,
   .setup_sfp              = &ixgbe_setup_sfp_modules_82599,
};
 
static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
{
   s32 status = 0;
   u32 ctrl, ctrl_ext;
   u32 i;
   u32 autoc;
   u32 autoc2;
 
   /* Call adapter stop to disable tx/rx and clear interrupts */
   hw->mac.ops.stop_adapter(hw);
 
   /* PHY ops must be identified and initialized prior to reset */
 
   /* Init PHY and function pointers, perform SFP setup */
   status = hw->phy.ops.init(hw);
 
   if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
      goto reset_hw_out;
 
   /* Setup SFP module if there is one present. */
   if (hw->phy.sfp_setup_needed) {
      status = hw->mac.ops.setup_sfp(hw);
      hw->phy.sfp_setup_needed = false;
   }
 
   /* Reset PHY */
   if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
      hw->phy.ops.reset(hw);
 
   /*
    * Prevent the PCI-E bus from from hanging by disabling PCI-E master
    * access and verify no pending requests before reset
    */
   status = ixgbe_disable_pcie_master(hw);
   if (status != 0) {
      status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
      hw_dbg(hw, "PCI-E Master disable polling has failed.\n");
   }
 
   /*
    * Issue global reset to the MAC.  This needs to be a SW reset.
    * If link reset is used, it might reset the MAC when mng is using it
    */
   ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
   IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | IXGBE_CTRL_RST));
   IXGBE_WRITE_FLUSH(hw);
 
   /* Poll for reset bit to self-clear indicating reset is complete */
   for (i = 0; i < 10; i++) {
      udelay(1);
      ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
      if (!(ctrl & IXGBE_CTRL_RST))
         break;
   }
   if (ctrl & IXGBE_CTRL_RST) {
      status = IXGBE_ERR_RESET_FAILED;
      hw_dbg(hw, "Reset polling failed to complete.\n");
   }
   /* Clear PF Reset Done bit so PF/VF Mail Ops can work */
   ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
   ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
   IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
 
   msleep(50);
 
 
 
   /*
    * Store the original AUTOC/AUTOC2 values if they have not been
    * stored off yet.  Otherwise restore the stored original
    * values since the reset operation sets back to defaults.
    */
   autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
   autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
   if (hw->mac.orig_link_settings_stored == false) {
      hw->mac.orig_autoc = autoc;
      hw->mac.orig_autoc2 = autoc2;
      hw->mac.orig_link_settings_stored = true;
   } else {
      if (autoc != hw->mac.orig_autoc)
         IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
                         IXGBE_AUTOC_AN_RESTART));
 
      if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
          (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
         autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
         autoc2 |= (hw->mac.orig_autoc2 &
                    IXGBE_AUTOC2_UPPER_MASK);
         IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
      }
   }
 
   /*
    * Store MAC address from RAR0, clear receive address registers, and
    * clear the multicast table.  Also reset num_rar_entries to 128,
    * since we modify this value when programming the SAN MAC address.
    */
   hw->mac.num_rar_entries = 128;
   hw->mac.ops.init_rx_addrs(hw);
 
   /* Store the permanent mac address */
   hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); // 读取eeprom中的mac地址,写入hw->mac.perm_addr
 
   /* Store the permanent SAN mac address */
   hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
 
   /* Add the SAN MAC address to the RAR only if it's a valid address */
   if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
      hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
                          hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
      /* Reserve the last RAR for the SAN MAC address */
      hw->mac.num_rar_entries--;
   }
 
reset_hw_out:
   return status;
}
 
s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
{
   u32 rar_high;
   u32 rar_low;
   u16 i;
 
   rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
   rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
 
   for (i = 0; i < 4; i++)
      mac_addr[i] = (u8)(rar_low >> (i*8));
 
   for (i = 0; i < 2; i++)
      mac_addr[i+4] = (u8)(rar_high >> (i*8));
 
   return 0;
}
 
#define IXGBE_RAL(_i)   (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
                         (0x0A200 + ((_i) * 8)))
#define IXGBE_RAH(_i)   (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
                         (0x0A204 + ((_i) * 8)))

ixgbe_init_interrupt_scheme()

int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
{
   int err;
 
   /* Number of supported queues */
   ixgbe_set_num_queues(adapter); // 根据FDIR/RSS设置adapter->num_tx/rx_queues
 
   err = ixgbe_set_interrupt_capability(adapter); // 向PCI子系统请求中断
   if (err) {
      DPRINTK(PROBE, ERR, "Unable to setup interrupt capabilities\n");
      goto err_set_interrupt;
   }
 
   err = ixgbe_alloc_q_vectors(adapter); // 设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
   if (err) {
      DPRINTK(PROBE, ERR, "Unable to allocate memory for queue "
              "vectors\n");
      goto err_alloc_q_vectors;
   }
 
   err = ixgbe_alloc_queues(adapter); // 分配发送/接收ring数组
   if (err) {
      DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n");
      goto err_alloc_queues;
   }
 
   DPRINTK(DRV, INFO, "Multiqueue %s: Rx Queue count = %u, "
           "Tx Queue count = %u\n",
           (adapter->num_rx_queues > 1) ? "Enabled" :
           "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
 
   set_bit(__IXGBE_DOWN, &adapter->state);
 
   return 0;
 
err_alloc_queues:
   ixgbe_free_q_vectors(adapter);
err_alloc_q_vectors:
   ixgbe_reset_interrupt_capability(adapter);
err_set_interrupt:
   return err;
}
ixgbe_set_num_queues()
static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
{
#ifdef IXGBE_FCOE
   if (ixgbe_set_fcoe_queues(adapter))
      goto done;
 
#endif /* IXGBE_FCOE */
#ifdef CONFIG_IXGBE_DCB
   if (ixgbe_set_dcb_queues(adapter))
      goto done;
 
#endif
   if (ixgbe_set_fdir_queues(adapter))
      goto done;
 
   if (ixgbe_set_rss_queues(adapter))
      goto done;
 
   /* fallback to base case */
   adapter->num_rx_queues = 1;
   adapter->num_tx_queues = 1;
 
done:
   /* Notify the stack of the (possibly) reduced Tx Queue count. */
   adapter->netdev->real_num_tx_queues = adapter->num_tx_queues; // 设置实际发送队列数
}
  
static bool inline ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter)
{
   bool ret = false;
   struct ixgbe_ring_feature *f_fdir = &adapter->ring_feature[RING_F_FDIR];
 
   // min(CPU数, IXGBE_MAX_FDIR_INDICES(64))
   f_fdir->indices = min((int)num_online_cpus(), f_fdir->indices);
   f_fdir->mask = 0;
 
   /* Flow Director must have RSS enabled */
   if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
       ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
        (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)))) {
      adapter->num_tx_queues = f_fdir->indices; // 设置发送队列数为min(CPU数, 64)
      adapter->num_rx_queues = f_fdir->indices; // 设置接收队列数为min(CPU数, 64)
      ret = true;
   } else {
      adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
      adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
   }
   return ret;
}
  
static inline bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
{
   bool ret = false;
   struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_RSS];
 
   if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
      f->mask = 0xF;
      adapter->num_rx_queues = f->indices; // 设置接收队列数为min(CPU数, 16)
      adapter->num_tx_queues = f->indices; // 设置发送队列数为min(CPU数, 16)
      ret = true;
   } else {
      ret = false;
   }
 
   return ret;
}
ixgbe_set_interrupt_capability()
static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
{
   struct ixgbe_hw *hw = &adapter->hw;
   int err = 0;
   int vector, v_budget;
 
   /*
    * It's easy to be greedy for MSI-X vectors, but it really
    * doesn't do us much good if we have a lot more vectors
    * than CPU's.  So let's be conservative and only ask for
    * (roughly) twice the number of vectors as there are CPU's.
    */
   // 计算ixgbe0的msix中断数,NON_Q_VECTORS对应的misx中断(LSC等)绑定ixgbe0所在NUMA的所有CPU
   v_budget = min(adapter->num_rx_queues + adapter->num_tx_queues,
                  (int)(num_online_cpus() * 2)) + NON_Q_VECTORS;
 
   /*
    * At the same time, hardware can only support a maximum of
    * hw.mac->max_msix_vectors vectors.  With features
    * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
    * descriptor queues supported by our device.  Thus, we cap it off in
    * those rare cases where the cpu count also exceeds our vector limit.
    */
   v_budget = min(v_budget, (int)hw->mac.max_msix_vectors);
 
   /* A failure in MSI-X entry allocation isn't fatal, but it does
    * mean we disable MSI-X capabilities of the adapter. */
   // 分配v_budget个msix_entry,地址赋给adapter->msix_entries
   adapter->msix_entries = kcalloc(v_budget,
                                   sizeof(struct msix_entry), GFP_KERNEL);
   if (adapter->msix_entries) {
      for (vector = 0; vector < v_budget; vector++)
         adapter->msix_entries[vector].entry = vector;
 
      ixgbe_acquire_msix_vectors(adapter, v_budget); // 向PCI子系统请求v_budget个msix中断
 
      if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
         goto out;
   }
 
   adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
   adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
   adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
   adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
   adapter->atr_sample_rate = 0;
   ixgbe_set_num_queues(adapter);
 
   err = pci_enable_msi(adapter->pdev); // 向PCI子系统请求1个msi中断
   if (!err) {
      adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
   } else {
      DPRINTK(HW, DEBUG, "Unable to allocate MSI interrupt, "
              "falling back to legacy.  Error: %d\n", err);
      /* reset err */
      err = 0;
   }
 
out:
   return err;
}
 
static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
                                       int vectors)
{
   int err, vector_threshold;
 
   /* We'll want at least 3 (vector_threshold):
    * 1) TxQ[0] Cleanup
    * 2) RxQ[0] Cleanup
    * 3) Other (Link Status Change, etc.)
    * 4) TCP Timer (optional)
    */
   vector_threshold = MIN_MSIX_COUNT;
 
   /* The more we get, the more we will assign to Tx/Rx Cleanup
    * for the separate queues...where Rx Cleanup >= Tx Cleanup.
    * Right now, we simply care about how many we'll get; we'll
    * set them up later while requesting irq's.
    */
   while (vectors >= vector_threshold) {
      // 向PCI子系统请求vectors个msix中断,将中断号写入adapter->msix_entries[i].vector
      err = pci_enable_msix(adapter->pdev, adapter->msix_entries,
                            vectors);
      if (!err) /* Success in acquiring all requested vectors. */
         break;
      else if (err < 0)
         vectors = 0; /* Nasty failure, quit now */
      else /* err == number of vectors we should try again with */
         vectors = err;
   }
 
   if (vectors < vector_threshold) {
      /* Can't allocate enough MSI-X interrupts?  Oh well.
       * This just means we'll go with either a single MSI
       * vector or fall back to legacy interrupts.
       */
      DPRINTK(HW, DEBUG, "Unable to allocate MSI-X interrupts\n");
      adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
      kfree(adapter->msix_entries);
      adapter->msix_entries = NULL;
   } else {
      adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */
      /*
       * Adjust for only the vectors we'll use, which is minimum
       * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
       * vectors we were allocated.
       */
      adapter->num_msix_vectors = min(vectors,
                         adapter->max_msix_q_vectors + NON_Q_VECTORS);
   }
}
ixgbe_alloc_q_vectors()和ixgbe_alloc_queues()
static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
{
   int q_idx, num_q_vectors;
   struct ixgbe_q_vector *q_vector;
   int napi_vectors;
   int (*poll)(struct napi_struct *, int);
 
   if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { // 使用MSIX(Message Signaled Interrupt-X)
      // 去掉绑定ixgbe0所在NUMA的所有CPU的msix中断(LSC等)
      num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
      napi_vectors = adapter->num_rx_queues;
      poll = &ixgbe_clean_rxtx_many; // 设置poll函数为ixgbe_clean_rxtx_many()
   } else { // 其它
      num_q_vectors = 1;
      napi_vectors = 1;
      poll = &ixgbe_poll; // 设置poll函数为ixgbe_poll()
   }
 
   for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
      q_vector = kzalloc(sizeof(struct ixgbe_q_vector), GFP_KERNEL); // 分配ixgbe_q_vector
      if (!q_vector)
         goto err_out;
      q_vector->adapter = adapter;
      if (q_vector->txr_count && !q_vector->rxr_count)
         q_vector->eitr = adapter->tx_eitr_param;
      else
         q_vector->eitr = adapter->rx_eitr_param;
      q_vector->v_idx = q_idx;
      /* 初始化q_vector->napi并加入adapter->netdev的napi_list,
         其中poll函数为ixgbe_clean_rxtx_many()/ixgbe_poll(),一次poll的最大报文数为64 */
      netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
      adapter->q_vector[q_idx] = q_vector; // 地址赋给adapter->q_vector[q_idx]
   }
 
   return 0;
 
err_out:
   while (q_idx) {
      q_idx--;
      q_vector = adapter->q_vector[q_idx];
      netif_napi_del(&q_vector->napi);
      kfree(q_vector);
      adapter->q_vector[q_idx] = NULL;
   }
   return -ENOMEM;
}
 
static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
{
   int i;
 
   // 分配num_tx_queues个ixgbe_ring(发送ring数组),地址赋给adapter->tx_ring
   adapter->tx_ring = kcalloc(adapter->num_tx_queues,
                              sizeof(struct ixgbe_ring), GFP_KERNEL);
   if (!adapter->tx_ring)
      goto err_tx_ring_allocation;
 
   // 分配num_rx_queues个ixgbe_ring(接收ring数组),地址赋给adapter->rx_ring
   adapter->rx_ring = kcalloc(adapter->num_rx_queues,
                              sizeof(struct ixgbe_ring), GFP_KERNEL);
   if (!adapter->rx_ring)
      goto err_rx_ring_allocation;
 
   for (i = 0; i < adapter->num_tx_queues; i++) {
      adapter->tx_ring[i].count = adapter->tx_ring_count; // 设置tx_ring[i].count
      adapter->tx_ring[i].queue_index = i; // 设置tx_ring[i].queue_index
   }
 
   for (i = 0; i < adapter->num_rx_queues; i++) {
      adapter->rx_ring[i].count = adapter->rx_ring_count; // 设置rx_ring[i].count
      adapter->rx_ring[i].queue_index = i; // 设置rx_ring[i].queue_index
   }
 
   ixgbe_cache_ring_register(adapter); // 设置tx/rx_ring[i].reg_idx
 
   return 0;
 
err_rx_ring_allocation:
   kfree(adapter->tx_ring);
err_tx_ring_allocation:
   return -ENOMEM;
}
  • 2
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
您好!要在CentOS 7上安装ixgbe网卡驱动,可以按照以下步骤进行操作: 1. 首先,确保您的CentOS 7系统已经连接到Internet。您可以通过使用有线或无线网络连接来实现。 2. 打开终端窗口,并使用root权限登录或者使用sudo命令。 3. 安装依赖的工具和内核开发包。运行以下命令来安装它们: ``` yum install -y gcc make kernel-devel ``` 4. 下载 ixgbe 驱动程序。您可以通过访问 Intel 官方网站下载最新版本的 ixgbe 驱动程序。确保下载与您的网卡型号和Linux内核版本匹配的驱动程序。 5. 解压 ixgbe 驱动程序包。运行以下命令: ``` tar -xf ixgbe-x.x.x.tar.gz ``` 其中 `ixgbe-x.x.x.tar.gz` 是您下载的驱动程序包的文件名。 6. 进入解压后的目录。运行以下命令: ``` cd ixgbe-x.x.x ``` 7. 编译和安装驱动程序。运行以下命令: ``` make install ``` 8. 重新加载网卡驱动程序。运行以下命令: ``` modprobe ixgbe ``` 9. 确认驱动程序已成功加载。运行以下命令: ``` ethtool -i ethX ``` 这里的 `ethX` 是您的ixgbe网卡设备名称,如eth0、eth1等。您将看到有关驱动程序的信息。 10. (可选)更新系统配置,使驱动在启动时自动加载。编辑 `/etc/modules-load.d/modules.conf` 文件,并添加以下行: ``` ixgbe ``` 11. 重新启动系统,以确保驱动在启动时自动加载。 这样,您的CentOS 7系统就安装好了ixgbe网卡驱动程序。请注意,以上步骤中的 `x.x.x` 应替换为您下载的驱动程序的版本号。希望对您有所帮助!如果您还有其他问题,请随时提问。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值