ixgbe网卡驱动(二)

ixgbe_netdev_ops

// 在ixgbe_probe()中注册ixgbe_netdev_ops
netdev->netdev_ops = &ixgbe_netdev_ops;

static const struct net_device_ops ixgbe_netdev_ops = {
	.ndo_open		= ixgbe_open, // ixgbe网卡打开时调用ixgbe_open()
	.ndo_stop		= ixgbe_close, // ixgbe网卡关闭时调用ixgbe_close()
	.ndo_start_xmit		= ixgbe_xmit_frame,
	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
	.ndo_validate_addr	= eth_validate_addr,
	.ndo_set_mac_address	= ixgbe_set_mac,
	.ndo_change_mtu		= ixgbe_change_mtu,
	.ndo_tx_timeout		= ixgbe_tx_timeout,
	.ndo_set_tx_maxrate	= ixgbe_tx_maxrate,
	.ndo_vlan_rx_add_vid	= ixgbe_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid	= ixgbe_vlan_rx_kill_vid,
	.ndo_do_ioctl		= ixgbe_ioctl,
	.ndo_set_vf_mac		= ixgbe_ndo_set_vf_mac,
	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
	.ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en,
	.ndo_set_vf_trust	= ixgbe_ndo_set_vf_trust,
	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
	.ndo_get_stats64	= ixgbe_get_stats64,
	.ndo_setup_tc		= __ixgbe_setup_tc,
#ifdef IXGBE_FCOE
	.ndo_select_queue	= ixgbe_select_queue,
	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
	.ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
	.ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,
	.ndo_fcoe_enable = ixgbe_fcoe_enable,
	.ndo_fcoe_disable = ixgbe_fcoe_disable,
	.ndo_fcoe_get_wwn = ixgbe_fcoe_get_wwn,
	.ndo_fcoe_get_hbainfo = ixgbe_fcoe_get_hbainfo,
#endif /* IXGBE_FCOE */
	.ndo_set_features = ixgbe_set_features,
	.ndo_fix_features = ixgbe_fix_features,
	.ndo_fdb_add		= ixgbe_ndo_fdb_add,
	.ndo_bridge_setlink	= ixgbe_ndo_bridge_setlink,
	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
	.ndo_dfwd_add_station	= ixgbe_fwd_add,
	.ndo_dfwd_del_station	= ixgbe_fwd_del,
	.ndo_udp_tunnel_add	= ixgbe_add_udp_tunnel_port,
	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
	.ndo_features_check	= ixgbe_features_check,
	.ndo_bpf		= ixgbe_xdp,
	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
	.ndo_xsk_async_xmit	= ixgbe_xsk_async_xmit,
};

ixgbe_open()

static int ixgbe_open(struct net_device *netdev)
{
   struct ixgbe_adapter *adapter = netdev_priv(netdev);
   int err;
 
   /* disallow open during test */
   if (test_bit(__IXGBE_TESTING, &adapter->state))
      return -EBUSY;
 
   netif_carrier_off(netdev);
 
   /* allocate transmit descriptors */
   // 为adapter->tx_ring[i]分配1024个ixgbe_tx_buffer,地址赋给adapter->tx_ring[i].tx_buffer_info
   err = ixgbe_setup_all_tx_resources(adapter);
   if (err)
      goto err_setup_tx;
 
   /* allocate receive descriptors */
   // 为adapter->rx_ring[i]分配1024个ixgbe_rx_buffer,地址赋给adapter->rx_ring[i].rx_buffer_info
   err = ixgbe_setup_all_rx_resources(adapter);
   if (err)
      goto err_setup_rx;
 
   ixgbe_configure(adapter); // 配置网卡
 
   err = ixgbe_request_irq(adapter); // 注册中断
   if (err)
      goto err_req_irq;
 
   /* ixgbe_up_complete() -> ixgbe_configure_msix() -> ixgbe_set_ivar()
                             ixgbe_napi_enable_all() -> napi_enable()
                             ixgbe_irq_enable() -> ixgbe_irq_enable_queues()
                             netif_tx_start_all_queues() -> netif_tx_start_queue()
      设置网卡寄存器IVAR
      清除napi->state的NAPI_STATE_SCHED标志
      打开中断
      清除所有发送队列的__QUEUE_STATE_XOFF标志 */
   err = ixgbe_up_complete(adapter);
   if (err)
      goto err_up;
 
   // 清除所有发送队列的__QUEUE_STATE_XOFF标志
   netif_tx_start_all_queues(netdev);
 
   return 0;
 
err_up:
   ixgbe_release_hw_control(adapter);
   ixgbe_free_irq(adapter);
err_req_irq:
err_setup_rx:
   ixgbe_free_all_rx_resources(adapter);
err_setup_tx:
   ixgbe_free_all_tx_resources(adapter);
   ixgbe_reset(adapter);
 
   return err;
}

ixgbe_setup_all_tx_resources()

static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
{
   int i, err = 0;
 
   for (i = 0; i < adapter->num_tx_queues; i++) { // 遍历每个tx_ring
      err = ixgbe_setup_tx_resources(adapter, &adapter->tx_ring[i]);
      if (!err)
         continue;
      DPRINTK(PROBE, ERR, "Allocation for Tx Queue %u failed\n", i);
      break;
   }
 
   return err;
}
 
int ixgbe_setup_tx_resources(struct ixgbe_adapter *adapter,
                             struct ixgbe_ring *tx_ring)
{
   struct pci_dev *pdev = adapter->pdev;
   int size;
 
   size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
   // 分配1024个ixgbe_tx_buffer,地址赋给tx_ring->tx_buffer_info
   tx_ring->tx_buffer_info = vmalloc(size);
   if (!tx_ring->tx_buffer_info)
      goto err;
   memset(tx_ring->tx_buffer_info, 0, size); // 清零ixgbe_tx_buffer数组
 
   /* round up to nearest 4K */
   tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
   tx_ring->size = ALIGN(tx_ring->size, 4096);
 
   // 分配1024个ixgbe_adv_tx_desc(一致性DMA内存),内存地址赋给tx_ring->desc,总线地址赋给tx_ring->dma
   tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
                                        &tx_ring->dma);
   if (!tx_ring->desc)
      goto err;
 
   tx_ring->next_to_use = 0; // 设置next_to_use为0
   tx_ring->next_to_clean = 0; // 设置next_to_clean为0
   tx_ring->work_limit = tx_ring->count;
   return 0;
 
err:
   vfree(tx_ring->tx_buffer_info);
   tx_ring->tx_buffer_info = NULL;
   DPRINTK(PROBE, ERR, "Unable to allocate memory for the transmit "
                       "descriptor ring\n");
   return -ENOMEM;
}

ixgbe_setup_all_rx_resources()

static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
{
   int i, err = 0;
 
   for (i = 0; i < adapter->num_rx_queues; i++) { // 遍历每个rx_ring
      err = ixgbe_setup_rx_resources(adapter, &adapter->rx_ring[i]);
      if (!err)
         continue;
      DPRINTK(PROBE, ERR, "Allocation for Rx Queue %u failed\n", i);
      break;
   }
 
   return err;
}
 
int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
                             struct ixgbe_ring *rx_ring)
{
   struct pci_dev *pdev = adapter->pdev;
   int size;
 
   size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
   // 分配1024个ixgbe_rx_buffer,地址赋给rx_ring->rx_buffer_info
   rx_ring->rx_buffer_info = vmalloc(size);
   if (!rx_ring->rx_buffer_info) {
      DPRINTK(PROBE, ERR,
              "vmalloc allocation failed for the rx desc ring\n");
      goto alloc_failed;
   }
   memset(rx_ring->rx_buffer_info, 0, size); // 清零ixgbe_rx_buffer数组
 
   /* Round up to nearest 4K */
   rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
   rx_ring->size = ALIGN(rx_ring->size, 4096);
 
   // 分配1024个ixgbe_adv_rx_desc(一致性DMA内存),内存地址赋给rx_ring->desc,总线地址赋给rx_ring->dma
   rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size, &rx_ring->dma);
 
   if (!rx_ring->desc) {
      DPRINTK(PROBE, ERR,
              "Memory allocation failed for the rx desc ring\n");
      vfree(rx_ring->rx_buffer_info);
      goto alloc_failed;
   }
 
   rx_ring->next_to_clean = 0; // 设置next_to_clean为0
   rx_ring->next_to_use = 0; // 设置next_to_use为0
 
   return 0;
 
alloc_failed:
   return -ENOMEM;
}

ixgbe_configure()

static void ixgbe_configure(struct ixgbe_adapter *adapter)
{
   struct net_device *netdev = adapter->netdev;
   struct ixgbe_hw *hw = &adapter->hw;
   int i;
 
   ixgbe_set_rx_mode(netdev);
 
   ixgbe_restore_vlan(adapter);
#ifdef CONFIG_IXGBE_DCB
   if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
      if (hw->mac.type == ixgbe_mac_82598EB)
         netif_set_gso_max_size(netdev, 32768);
      else
         netif_set_gso_max_size(netdev, 65536);
      ixgbe_configure_dcb(adapter);
   } else {
      netif_set_gso_max_size(netdev, 65536);
   }
#else
   netif_set_gso_max_size(netdev, 65536);
#endif
 
#ifdef IXGBE_FCOE
   if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
      ixgbe_configure_fcoe(adapter);
 
#endif /* IXGBE_FCOE */
   if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
      for (i = 0; i < adapter->num_tx_queues; i++)
         adapter->tx_ring[i].atr_sample_rate =
                                        adapter->atr_sample_rate;
      ixgbe_init_fdir_signature_82599(hw, adapter->fdir_pballoc);
   } else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
      ixgbe_init_fdir_perfect_82599(hw, adapter->fdir_pballoc);
   }
 
   ixgbe_configure_tx(adapter); // 配置发送队列
   ixgbe_configure_rx(adapter); // 配置接收队列
   for (i = 0; i < adapter->num_rx_queues; i++)
      // 为adapter->rx_ring[i]分配1024个skb和数据缓存区
      ixgbe_alloc_rx_buffers(adapter, &adapter->rx_ring[i],
                             (adapter->rx_ring[i].count - 1));
}

ixgbe_configure_tx()

static void ixgbe_configure_tx(struct ixgbe_adapter *adapter)
{
   u64 tdba;
   struct ixgbe_hw *hw = &adapter->hw;
   u32 i, j, tdlen, txctrl;
 
   /* Setup the HW Tx Head and Tail descriptor pointers */
   for (i = 0; i < adapter->num_tx_queues; i++) {
      struct ixgbe_ring *ring = &adapter->tx_ring[i];
      j = ring->reg_idx;
      tdba = ring->dma; // ixgbe_adv_tx_desc数组的总线地址
      tdlen = ring->count * sizeof(union ixgbe_adv_tx_desc);
      /* 将ixgbe_adv_tx_desc数组的总线地址写入网卡寄存器
         TDBAL(TX Descriptor Base Address Low)
         TDBAH(TX Descriptor Base Address High)
         TDLEN(TX Descriptor Length)
         TDH(TX Descriptor Head)
         TDL(TX Descriptor Tail) */
      // #define IXGBE_TDBAL(_i) (0x06000 + ((_i) * 0x40)) /* 32 of these (0-31)*/
      // #define IXGBE_TDBAH(_i) (0x06004 + ((_i) * 0x40))
      // #define IXGBE_TDLEN(_i) (0x06008 + ((_i) * 0x40))
      // #define IXGBE_TDH(_i)   (0x06010 + ((_i) * 0x40))
      // #define IXGBE_TDT(_i)   (0x06018 + ((_i) * 0x40))
      IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j),
                      (tdba & DMA_BIT_MASK(32)));
      IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32));
      IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), tdlen);
      IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0);
      IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0);
      adapter->tx_ring[i].head = IXGBE_TDH(j);
      adapter->tx_ring[i].tail = IXGBE_TDT(j);
      /*
       * Disable Tx Head Writeback RO bit, since this hoses
       * bookkeeping if things aren't delivered in order.
       */
      switch (hw->mac.type) {
      case ixgbe_mac_82598EB:
         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j));
         break;
      case ixgbe_mac_82599EB:
      default:
         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j));
         break;
      }
      txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
      switch (hw->mac.type) {
      case ixgbe_mac_82598EB:
         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl);
         break;
      case ixgbe_mac_82599EB:
      default:
         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl);
         break;
      }
   }
 
   if (hw->mac.type == ixgbe_mac_82599EB) {
      u32 rttdcs;
 
      /* disable the arbiter while setting MTQC */
      rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
      rttdcs |= IXGBE_RTTDCS_ARBDIS;
      IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
 
      /* We enable 8 traffic classes, DCB only */
      if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
         IXGBE_WRITE_REG(hw, IXGBE_MTQC, (IXGBE_MTQC_RT_ENA |
                         IXGBE_MTQC_8TC_8TQ));
      else
         IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
 
      /* re-eable the arbiter */
      rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
      IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   }
}

ixgbe_configure_rx()

static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
{
   u64 rdba;
   struct ixgbe_hw *hw = &adapter->hw;
   struct ixgbe_ring *rx_ring;
   struct net_device *netdev = adapter->netdev;
   int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
   int i, j;
   u32 rdlen, rxctrl, rxcsum;
   static const u32 seed[10] = { 0xE291D73D, 0x1805EC6C, 0x2A94B30D,
                     0xA54F2BEC, 0xEA49AF7C, 0xE214AD3D, 0xB855AABE,
                     0x6A3E67EA, 0x14364D17, 0x3BED200D};
   u32 fctrl, hlreg0;
   u32 reta = 0, mrqc = 0;
   u32 rdrxctl;
   int rx_buf_len;
 
   /* Decide whether to use packet split mode or not */
   adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
 
   /* Set the RX buffer length according to the mode */
   if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) {
      rx_buf_len = IXGBE_RX_HDR_SIZE;
      if (hw->mac.type == ixgbe_mac_82599EB) {
         /* PSRTYPE must be initialized in 82599 */
         u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
                       IXGBE_PSRTYPE_UDPHDR |
                       IXGBE_PSRTYPE_IPV4HDR |
                       IXGBE_PSRTYPE_IPV6HDR |
                       IXGBE_PSRTYPE_L2HDR;
         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
      }
   } else {
      if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) &&
          (netdev->mtu <= ETH_DATA_LEN))
         rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE;
      else
         rx_buf_len = ALIGN(max_frame, 1024);
   }
 
   fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   fctrl |= IXGBE_FCTRL_BAM;
   fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
   fctrl |= IXGBE_FCTRL_PMCF;
   IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
 
   hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   if (adapter->netdev->mtu <= ETH_DATA_LEN)
      hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
   else
      hlreg0 |= IXGBE_HLREG0_JUMBOEN;
#ifdef IXGBE_FCOE
   if (netdev->features & NETIF_F_FCOE_MTU)
      hlreg0 |= IXGBE_HLREG0_JUMBOEN;
#endif
   IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
 
   rdlen = adapter->rx_ring[0].count * sizeof(union ixgbe_adv_rx_desc);
   /* disable receives while setting up the descriptors */
   rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
 
   /*
    * Setup the HW Rx Head and Tail Descriptor Pointers and
    * the Base and Length of the Rx Descriptor Ring
    */
   for (i = 0; i < adapter->num_rx_queues; i++) {
      rx_ring = &adapter->rx_ring[i];
      rdba = rx_ring->dma; // ixgbe_adv_rx_desc数组的总线地址
      j = rx_ring->reg_idx;
      /* 将ixgbe_adv_rx_desc数组的总线地址写入网卡寄存器
         RDBAL(RX Descriptor Base Address Low)
         RDBAH(RX Descriptor Base Address High)
         RDLEN(RX Descriptor Length)
         RDH(RX Descriptor Head)
         RDL(RX Descriptor Tail) */
      // #define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \
                                  (0x0D000 + ((_i - 64) * 0x40)))
      // #define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \
                                  (0x0D004 + ((_i - 64) * 0x40)))
      // #define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \
                                  (0x0D008 + ((_i - 64) * 0x40)))
      // #define IXGBE_RDH(_i)   (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \
                                  (0x0D010 + ((_i - 64) * 0x40)))
      // #define IXGBE_RDT(_i)   (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \
                                  (0x0D018 + ((_i - 64) * 0x40)))
      IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), (rdba & DMA_BIT_MASK(32)));
      IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32));
      IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), rdlen);
      IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0);
      IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0);
      rx_ring->head = IXGBE_RDH(j);
      rx_ring->tail = IXGBE_RDT(j);
      rx_ring->rx_buf_len = rx_buf_len;
 
      if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED)
         rx_ring->flags |= IXGBE_RING_RX_PS_ENABLED;
      else
         rx_ring->flags &= ~IXGBE_RING_RX_PS_ENABLED;
 
#ifdef IXGBE_FCOE
      if (netdev->features & NETIF_F_FCOE_MTU) {
         struct ixgbe_ring_feature *f;
         f = &adapter->ring_feature[RING_F_FCOE];
         if ((i >= f->mask) && (i < f->mask + f->indices)) {
            rx_ring->flags &= ~IXGBE_RING_RX_PS_ENABLED;
            if (rx_buf_len < IXGBE_FCOE_JUMBO_FRAME_SIZE)
               rx_ring->rx_buf_len =
                       IXGBE_FCOE_JUMBO_FRAME_SIZE;
         }
      }
 
#endif /* IXGBE_FCOE */
      ixgbe_configure_srrctl(adapter, rx_ring);
   }
 
   if (hw->mac.type == ixgbe_mac_82598EB) {
      /*
       * For VMDq support of different descriptor types or
       * buffer sizes through the use of multiple SRRCTL
       * registers, RDRXCTL.MVMEN must be set to 1
       *
       * also, the manual doesn't mention it clearly but DCA hints
       * will only use queue 0's tags unless this bit is set.  Side
       * effects of setting this bit are only that SRRCTL must be
       * fully programmed [0..15]
       */
      rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
      rdrxctl |= IXGBE_RDRXCTL_MVMEN;
      IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   }
 
   /* Program MRQC for the distribution of queues */
   mrqc = ixgbe_setup_mrqc(adapter);
 
   if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
      /* Fill out redirection table */
      for (i = 0, j = 0; i < 128; i++, j++) {
         if (j == adapter->ring_feature[RING_F_RSS].indices)
            j = 0;
         /* reta = 4-byte sliding window of
          * 0x00..(indices-1)(indices-1)00..etc. */
         reta = (reta << 8) | (j * 0x11);
         if ((i & 3) == 3)
            IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
      }
 
      /* Fill out hash function seeds */
      for (i = 0; i < 10; i++)
         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), seed[i]);
 
      if (hw->mac.type == ixgbe_mac_82598EB)
         mrqc |= IXGBE_MRQC_RSSEN;
          /* Perform hash on these packet types */
      mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4
            | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
            | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
            | IXGBE_MRQC_RSS_FIELD_IPV6
            | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
            | IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   }
   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
 
   rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
 
   if (adapter->flags & IXGBE_FLAG_RSS_ENABLED ||
       adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED) {
      /* Disable indicating checksum in descriptor, enables
       * RSS hash */
      rxcsum |= IXGBE_RXCSUM_PCSD;
   }
   if (!(rxcsum & IXGBE_RXCSUM_PCSD)) {
      /* Enable IPv4 payload checksum for UDP fragments
       * if PCSD is not set */
      rxcsum |= IXGBE_RXCSUM_IPPCSE;
   }
 
   IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
 
   if (hw->mac.type == ixgbe_mac_82599EB) {
      rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
      rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
      rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
      IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   }
 
   if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
      /* Enable 82599 HW-RSC */
      for (i = 0; i < adapter->num_rx_queues; i++)
         ixgbe_configure_rscctl(adapter, i, rx_buf_len);
 
      /* Disable RSC for ACK packets */
      IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
         (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   }
}

ixgbe_alloc_rx_buffers()

为从rx_ring->next_to_use开始的cleaned_count个ixgbe_adv_rx_desc/ixgbe_rx_buffer分配skb和数据缓存区

static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter,
                                   struct ixgbe_ring *rx_ring,
                                   int cleaned_count)
{
   struct pci_dev *pdev = adapter->pdev;
   union ixgbe_adv_rx_desc *rx_desc;
   struct ixgbe_rx_buffer *bi;
   unsigned int i;
 
   i = rx_ring->next_to_use;
   bi = &rx_ring->rx_buffer_info[i]; // 得到第i个ixgbe_rx_buffer
 
   while (cleaned_count--) {
      rx_desc = IXGBE_RX_DESC_ADV(*rx_ring, i); // 得到第i个ixgbe_adv_rx_desc
 
      if (!bi->page_dma &&
          (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED)) {
         if (!bi->page) {
            bi->page = alloc_page(GFP_ATOMIC);
            if (!bi->page) {
               adapter->alloc_rx_page_failed++;
               goto no_buffers;
            }
            bi->page_offset = 0;
         } else {
            /* use a half page if we're re-using */
            bi->page_offset ^= (PAGE_SIZE / 2);
         }
 
         bi->page_dma = pci_map_page(pdev, bi->page,
                                     bi->page_offset,
                                     (PAGE_SIZE / 2),
                                     PCI_DMA_FROMDEVICE);
      }
 
      if (!bi->skb) { // 若skb为空
         struct sk_buff *skb;
         skb = netdev_alloc_skb(adapter->netdev,
                                (rx_ring->rx_buf_len +
                                 NET_IP_ALIGN)); // 分配新skb
 
         if (!skb) {
            adapter->alloc_rx_buff_failed++;
            goto no_buffers;
         }
 
         /*
          * Make buffer alignment 2 beyond a 16 byte boundary
          * this will result in a 16 byte aligned IP header after
          * the 14 byte MAC header is removed
          */
         skb_reserve(skb, NET_IP_ALIGN); // 将skb->data/tail向下移动NET_IP_ALIGN
 
         bi->skb = skb; // 将skb挂载到bi
         // 流式DMA映射skb->data,总线地址赋给bi->dma
         bi->dma = pci_map_single(pdev, skb->data,
                                  rx_ring->rx_buf_len,
                                  PCI_DMA_FROMDEVICE);
      }
      /* Refresh the desc even if buffer_addrs didn't change because
       * each write-back erases this info. */
      if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) {
         rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
         rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
      } else {
         rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); // 将bi->dma赋给rx_desc->read.pkt_addr
      }
 
      i++;
      if (i == rx_ring->count)
         i = 0; // 环形缓存区
      bi = &rx_ring->rx_buffer_info[i];
   }
 
no_buffers:
   if (rx_ring->next_to_use != i) { // 若next_to_use等于i,不需要更新next_to_use和tail
      rx_ring->next_to_use = i;
      if (i-- == 0)
         i = (rx_ring->count - 1);
 
      /* ixgbe_release_rx_desc() -> IXGBE_WRITE_REG()
         将i写入网卡寄存器RDT */
      ixgbe_release_rx_desc(&adapter->hw, rx_ring, i);
   }
}

ixgbe_request_irq()

在ixgbe_request_irq()中申请并设置硬中断函数:

  • 使用MSIX,硬中断函数为ixgbe_msix_clean_many()
  • 使用MSI或其它,硬中断函数为ixgbe_intr()
static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
{
   struct net_device *netdev = adapter->netdev;
   int err;
 
   if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { // 使用MSIX
      // 中断函数为ixgbe_msix_clean_many()
      err = ixgbe_request_msix_irqs(adapter);
   } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { // 使用MSI
      err = request_irq(adapter->pdev->irq, &ixgbe_intr, 0,
                        netdev->name, netdev); // 中断函数为ixgbe_intr()
   } else { // 其它
      err = request_irq(adapter->pdev->irq, &ixgbe_intr, IRQF_SHARED,
                        netdev->name, netdev); // 中断函数为ixgbe_intr()
   }
 
   if (err)
      DPRINTK(PROBE, ERR, "request_irq failed, Error %d\n", err);
 
   return err;
}

ixgbe_request_msix_irqs()

/**
 * ixgbe_request_msix_irqs - Initialize MSI-X interrupts
 * @adapter: board private structure
 *
 * ixgbe_request_msix_irqs allocates MSI-X vectors and requests
 * interrupts from the kernel.
 **/
static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
	irqreturn_t (*handler)(int, void *);
	int i, vector, q_vectors, err;
	int ri=0, ti=0;

	/* Decrement for Other and TCP Timer vectors */
	// 去掉绑定ixgbe0所在NUMA的所有CPU的msix中断(LSC等)
	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;

	/* Map the Tx/Rx rings to the vectors we were allotted. */
	err = ixgbe_map_rings_to_vectors(adapter, q_vectors); // 映射ring到vector
	if (err)
		goto out;

#define SET_HANDLER(_v) ((!(_v)->rxr_count) ? &ixgbe_msix_clean_tx : \
                         (!(_v)->txr_count) ? &ixgbe_msix_clean_rx : \
                         &ixgbe_msix_clean_many) // 中断函数为ixgbe_msix_clean_many()
	for (vector = 0; vector < q_vectors; vector++) {
		handler = SET_HANDLER(adapter->q_vector[vector]); // 设置中断函数

		if(handler == &ixgbe_msix_clean_rx) {
			sprintf(adapter->name[vector], "%s-%s-%d",
				netdev->name, "rx", ri++);
		}
		else if(handler == &ixgbe_msix_clean_tx) {
			sprintf(adapter->name[vector], "%s-%s-%d",
				netdev->name, "tx", ti++);
		}
		else
		    // 通过cat /proc/interrupts可以看到xgbe0-TxRx-0等
			sprintf(adapter->name[vector], "%s-%s-%d",
				netdev->name, "TxRx", vector);

        // 为每一个vector注册msix中断
		err = request_irq(adapter->msix_entries[vector].vector,
		                  handler, 0, adapter->name[vector],
		                  adapter->q_vector[vector]);
		if (err) {
			DPRINTK(PROBE, ERR,
			        "request_irq failed for MSIX interrupt "
			        "Error: %d\n", err);
			goto free_queue_irqs;
		}
	}

	sprintf(adapter->name[vector], "%s:lsc", netdev->name);
	// 注册绑定ixgbe0所在NUMA的所有CPU的msix中断(LSC等)
	err = request_irq(adapter->msix_entries[vector].vector,
	                  &ixgbe_msix_lsc, 0, adapter->name[vector], netdev);
	if (err) {
		DPRINTK(PROBE, ERR,
			"request_irq for msix_lsc failed: %d\n", err);
		goto free_queue_irqs;
	}

	return 0;

free_queue_irqs:
	for (i = vector - 1; i >= 0; i--)
		free_irq(adapter->msix_entries[--vector].vector,
		         adapter->q_vector[i]);
	adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
	pci_disable_msix(adapter->pdev);
	kfree(adapter->msix_entries);
	adapter->msix_entries = NULL;
out:
	return err;
}

ixgbe_map_rings_to_vectors()

static int ixgbe_map_rings_to_vectors(struct ixgbe_adapter *adapter,
                                      int vectors)
{
	int v_start = 0;
	int rxr_idx = 0, txr_idx = 0;
	int rxr_remaining = adapter->num_rx_queues;
	int txr_remaining = adapter->num_tx_queues;
	int i, j;
	int rqpv, tqpv;
	int err = 0;

	/* No mapping required if MSI-X is disabled. */
	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
		goto out;

	/*
	 * The ideal configuration...
	 * We have enough vectors to map one per queue.
	 */
	if (vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
		for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
			map_vector_to_rxq(adapter, v_start, rxr_idx); // 映射vector到接收队列

		for (; txr_idx < txr_remaining; v_start++, txr_idx++)
			map_vector_to_txq(adapter, v_start, txr_idx); // 映射vector到发送队列

		goto out;
	}

	/*
	 * If we don't have enough vectors for a 1-to-1
	 * mapping, we'll have to group them so there are
	 * multiple queues per vector.
	 */
	/* Re-adjusting *qpv takes care of the remainder. */
	for (i = v_start; i < vectors; i++) {
		rqpv = DIV_ROUND_UP(rxr_remaining, vectors - i);
		for (j = 0; j < rqpv; j++) {
			map_vector_to_rxq(adapter, i, rxr_idx);
			rxr_idx++;
			rxr_remaining--;
		}
	}
	for (i = v_start; i < vectors; i++) {
		tqpv = DIV_ROUND_UP(txr_remaining, vectors - i);
		for (j = 0; j < tqpv; j++) {
			map_vector_to_txq(adapter, i, txr_idx);
			txr_idx++;
			txr_remaining--;
		}
	}

out:
	return err;
}

static inline void map_vector_to_rxq(struct ixgbe_adapter *a, int v_idx,
                                     int r_idx)
{
	struct ixgbe_q_vector *q_vector = a->q_vector[v_idx];

	set_bit(r_idx, q_vector->rxr_idx); // 设置q_vector->rxr_idx
	q_vector->rxr_count++; // 更新rxr_count
}

static inline void map_vector_to_txq(struct ixgbe_adapter *a, int v_idx,
                                     int t_idx)
{
	struct ixgbe_q_vector *q_vector = a->q_vector[v_idx];

	set_bit(t_idx, q_vector->txr_idx); // 设置q_vector->txr_idx
	q_vector->txr_count++; // 更新txr_count
}

netif_tx_start_all_queues()

static inline void netif_tx_start_all_queues(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		netif_tx_start_queue(txq);
	}
}

static inline
struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
					 unsigned int index)
{
	return &dev->_tx[index];
}

static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
{
	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
}
  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值