注册网卡驱动
和大部分设备驱动一样,网卡驱动是作为一个module注册到kernel的
通过module_init() -> ixgbe_init_module() -> pci_register_driver()注册ixgbe_driver
通过module_exit() -> ixgbe_exit_module() -> pci_unregister_driver()注销ixgbe_driver
static struct pci_driver ixgbe_driver = {
.name = ixgbe_driver_name,
.id_table = ixgbe_pci_tbl,
.probe = ixgbe_probe, // 系统探测到ixgbe网卡后调用ixgbe_probe()
.remove = ixgbe_remove,
#ifdef CONFIG_PM
.suspend = ixgbe_suspend,
.resume = ixgbe_resume,
#endif
.shutdown = ixgbe_shutdown,
.sriov_configure = ixgbe_pci_sriov_configure,
.err_handler = &ixgbe_err_handler
};
static int __init ixgbe_init_module(void)
{
...
ret = pci_register_driver(&ixgbe_driver); // 注册ixgbe_driver
...
}
module_init(ixgbe_init_module);
static void __exit ixgbe_exit_module(void)
{
...
pci_unregister_driver(&ixgbe_driver); // 注销ixgbe_driver
...
}
module_exit(ixgbe_exit_module);
pci_register_driver()
pci_register_driver() ->
__pci_register_driver() ->
driver_register() ->
bus_add_driver() ->
driver_attach() ->
bus_for_each_dev() ->
__driver_attach() ->
driver_probe_device() ->
really_probe() ->
pci_device_probe() ->
__pci_device_probe() ->
pci_call_probe() ->
local_pci_probe()
static long local_pci_probe(void *_ddi)
{
...
rc = pci_drv->probe(pci_dev, ddi->id); // 系统探测到设备后调用设备驱动的probe
...
}
ixgbe_probe()
static int __devinit ixgbe_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct net_device *netdev;
struct ixgbe_adapter *adapter = NULL;
struct ixgbe_hw *hw;
const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; // 根据网卡型号(82598/82599)选择ixgbe_info
static int cards_found;
int i, err, pci_using_dac;
#ifdef IXGBE_FCOE
u16 device_caps;
#endif
u32 part_num, eec;
/* pci_enable_device_mem() -> __pci_enable_device_flags() -> do_pci_enable_device()
-> pcibios_enable_device() -> pci_enable_resources() -> pci_write_config_word()
向配置寄存器Command(0x04)中写入PCI_COMMAND_MEMORY(0x2),允许网卡驱动访问网卡的Memory空间 */
err = pci_enable_device_mem(pdev);
if (err)
return err;
/* pci_set_dma_mask() -> dma_set_mask() -> dma_supported()
检查并设置PCI总线地址位数 */
if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
pci_using_dac = 1;
} else {
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "No usable DMA "
"configuration, aborting\n");
goto err_dma;
}
}
pci_using_dac = 0;
}
/* pci_request_selected_regions() -> __pci_request_selected_regions() -> __pci_request_region()
-> request_region()/__request_mem_region() -> __request_region() -> __request_resource()
登记BAR中的总线地址(将resource插入iomem_resource资源树) */
err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
IORESOURCE_MEM), ixgbe_driver_name);
if (err) {
dev_err(&pdev->dev,
"pci_request_selected_regions failed 0x%x\n", err);
goto err_pci_reg;
}
pci_enable_pcie_error_reporting(pdev);
/* pci_set_master() -> __pci_set_master() -> pci_write_config_word()
向配置寄存器Command(0x04)中写入PCI_COMMAND_MASTER(0x4),允许网卡申请PCI总线控制权 */
pci_set_master(pdev);
/* pci_save_state() -> pci_read_config_dword()
读取并保存配置空间到dev->saved_config_space */
pci_save_state(pdev);
// 分配net_device和ixgbe_adapter,发送队列数为MAX_TX_QUEUES(128)
netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);
if (!netdev) {
err = -ENOMEM;
goto err_alloc_etherdev;
}
SET_NETDEV_DEV(netdev, &pdev->dev);
pci_set_drvdata(pdev, netdev);
adapter = netdev_priv(netdev); // 得到ixgbe_adapter的指针
adapter->netdev = netdev;
adapter->pdev = pdev;
hw = &adapter->hw; // 得到ixgbe_hw的指针
hw->back = adapter;
adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
// 将BAR0中的总线地址映射成内存地址,赋给hw->hw_addr,允许网卡驱动通过hw->hw_addr访问网卡的BAR0对应的Memory空间
hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
pci_resource_len(pdev, 0));
if (!hw->hw_addr) {
err = -EIO;
goto err_ioremap;
}
for (i = 1; i <= 5; i++) {
if (pci_resource_len(pdev, i) == 0)
continue;
}
netdev->netdev_ops = &ixgbe_netdev_ops; // 注册ixgbe_netdev_ops
ixgbe_set_ethtool_ops(netdev);
netdev->watchdog_timeo = 5 * HZ;
strcpy(netdev->name, pci_name(pdev));
adapter->bd_number = cards_found; // 设置adapter->bd_number为0
/* Setup hw api */
memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops));
hw->mac.type = ii->mac;
/* EEPROM */
memcpy(&hw->eeprom.ops, ii->eeprom_ops, sizeof(hw->eeprom.ops));
eec = IXGBE_READ_REG(hw, IXGBE_EEC); // 读取BAR0对应的Memory空间的IXGBE_EEC
/* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
if (!(eec & (1 << 8)))
hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;
/* PHY */
memcpy(&hw->phy.ops, ii->phy_ops, sizeof(hw->phy.ops));
hw->phy.sfp_type = ixgbe_sfp_type_unknown;
/* ixgbe_identify_phy_generic will set prtad and mmds properly */
hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
hw->phy.mdio.mmds = 0;
hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
hw->phy.mdio.dev = netdev;
hw->phy.mdio.mdio_read = ixgbe_mdio_read;
hw->phy.mdio.mdio_write = ixgbe_mdio_write;
/* set up this timer and work struct before calling get_invariants
* which might start the timer
*/
init_timer(&adapter->sfp_timer);
adapter->sfp_timer.function = &ixgbe_sfp_timer;
adapter->sfp_timer.data = (unsigned long) adapter;
INIT_WORK(&adapter->sfp_task, ixgbe_sfp_task);
/* multispeed fiber has its own tasklet, called from GPI SDP1 context */
INIT_WORK(&adapter->multispeed_fiber_task, ixgbe_multispeed_fiber_task);
/* a new SFP+ module arrival, called from GPI SDP2 context */
INIT_WORK(&adapter->sfp_config_module_task,
ixgbe_sfp_config_module_task);
/* ixgbe_get_invariants_82599() -> ixgbe_get_pcie_msix_count_82599()
设置hw->mac->max_tx/rx_queues为IXGBE_82599_MAX_TX/RX_QUEUES(128)
读取并保存EEPROM的MSI_X_N(0x3F = 63)到hw->mac->max_msix_vectors */
ii->get_invariants(hw);
/* setup the private structure */
/* 初始化ixgbe_adapter:
设置adapter->tx/rx_ring_count为1024(默认1024,最小64,最大4096)
设置adapter->ring_feature[RING_F_RSS].indices为min(CPU数, IXGBE_MAX_RSS_INDICES(16))
设置adapter->ring_feature[RING_F_FDIR].indices为IXGBE_MAX_FDIR_INDICES(64)
设置adapter->flags的IXGBE_FLAG_RSS_ENABLED和IXGBE_FLAG_FDIR_HASH_CAPABLE */
err = ixgbe_sw_init(adapter);
if (err)
goto err_sw_init;
/*
* If there is a fan on this device and it has failed log the
* failure.
*/
if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
if (esdp & IXGBE_ESDP_SDP1)
DPRINTK(PROBE, CRIT,
"Fan has stopped, replace the adapter\n");
}
/* reset_hw fills in the perm_addr as well */
/* ixgbe_reset_hw_82599() -> ixgbe_get_mac_addr_generic()
读取eeprom中的mac地址,写入hw->mac.perm_addr */
err = hw->mac.ops.reset_hw(hw);
if (err == IXGBE_ERR_SFP_NOT_PRESENT &&
hw->mac.type == ixgbe_mac_82598EB) {
/*
* Start a kernel thread to watch for a module to arrive.
* Only do this for 82598, since 82599 will generate
* interrupts on module arrival.
*/
set_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
mod_timer(&adapter->sfp_timer,
round_jiffies(jiffies + (2 * HZ)));
err = 0;
} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
dev_err(&adapter->pdev->dev, "failed to initialize because "
"an unsupported SFP+ module type was detected.\n"
"Reload the driver after installing a supported "
"module.\n");
goto err_sw_init;
} else if (err) {
dev_err(&adapter->pdev->dev, "HW Init failed: %d\n", err);
goto err_sw_init;
}
netdev->features = NETIF_F_SG |
NETIF_F_IP_CSUM |
NETIF_F_HW_VLAN_TX |
NETIF_F_HW_VLAN_RX |
NETIF_F_HW_VLAN_FILTER;
netdev->features |= NETIF_F_IPV6_CSUM;
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
netdev->features |= NETIF_F_GRO;
if (adapter->hw.mac.type == ixgbe_mac_82599EB)
netdev->features |= NETIF_F_SCTP_CSUM;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_IP_CSUM;
netdev->vlan_features |= NETIF_F_IPV6_CSUM;
netdev->vlan_features |= NETIF_F_SG;
if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
#ifdef CONFIG_IXGBE_DCB
netdev->dcbnl_ops = &dcbnl_ops;
#endif
#ifdef IXGBE_FCOE
if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
if (hw->mac.ops.get_device_caps) {
hw->mac.ops.get_device_caps(hw, &device_caps);
if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
}
}
#endif /* IXGBE_FCOE */
if (pci_using_dac)
netdev->features |= NETIF_F_HIGHDMA;
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
netdev->features |= NETIF_F_LRO;
/* make sure the EEPROM is good */
if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
dev_err(&pdev->dev, "The EEPROM Checksum Is Not Valid\n");
err = -EIO;
goto err_eeprom;
}
memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); // 将mac地址赋给netdev->dev_addr
memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len);
if (ixgbe_validate_mac_addr(netdev->perm_addr)) {
dev_err(&pdev->dev, "invalid MAC address\n");
err = -EIO;
goto err_eeprom;
}
init_timer(&adapter->watchdog_timer);
adapter->watchdog_timer.function = &ixgbe_watchdog;
adapter->watchdog_timer.data = (unsigned long)adapter;
INIT_WORK(&adapter->reset_task, ixgbe_reset_task);
INIT_WORK(&adapter->watchdog_task, ixgbe_watchdog_task);
/* ixgbe_init_interrupt_scheme() -> ixgbe_set_num_queues() -> ixgbe_set_fdir_queues()/ixgbe_set_rss_queues()
ixgbe_set_interrupt_capability() -> ixgbe_acquire_msix_vectors() -> pci_enable_msix()
ixgbe_alloc_q_vectors()
ixgbe_alloc_queues()
根据FDIR/RSS设置adapter->num_tx/rx_queues
向PCI子系统请求中断
设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
分配发送/接收ring数组 */
err = ixgbe_init_interrupt_scheme(adapter);
if (err)
goto err_sw_init;
switch (pdev->device) {
case IXGBE_DEV_ID_82599_KX4:
adapter->wol = (IXGBE_WUFC_MAG | IXGBE_WUFC_EX |
IXGBE_WUFC_MC | IXGBE_WUFC_BC);
/* Enable ACPI wakeup in GRC */
IXGBE_WRITE_REG(hw, IXGBE_GRC,
(IXGBE_READ_REG(hw, IXGBE_GRC) & ~IXGBE_GRC_APME));
break;
default:
adapter->wol = 0;
break;
}
device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
/* pick up the PCI bus settings for reporting later */
hw->mac.ops.get_bus_info(hw);
/* print bus type/speed/width info */
dev_info(&pdev->dev, "(PCI Express:%s:%s) %pM\n",
((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
(hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
(hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
(hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
"Unknown"),
netdev->dev_addr);
ixgbe_read_pba_num_generic(hw, &part_num);
if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
dev_info(&pdev->dev, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %06x-%03x\n",
hw->mac.type, hw->phy.type, hw->phy.sfp_type,
(part_num >> 8), (part_num & 0xff));
else
dev_info(&pdev->dev, "MAC: %d, PHY: %d, PBA No: %06x-%03x\n",
hw->mac.type, hw->phy.type,
(part_num >> 8), (part_num & 0xff));
if (hw->bus.width <= ixgbe_bus_width_pcie_x4) {
dev_warn(&pdev->dev, "PCI-Express bandwidth available for "
"this card is not sufficient for optimal "
"performance.\n");
dev_warn(&pdev->dev, "For optimal performance a x8 "
"PCI-Express slot is required.\n");
}
/* save off EEPROM version number */
hw->eeprom.ops.read(hw, 0x29, &adapter->eeprom_version);
/* reset the hardware with the new settings */
err = hw->mac.ops.start_hw(hw);
if (err == IXGBE_ERR_EEPROM_VERSION) {
/* We are running on a pre-production device, log a warning */
dev_warn(&pdev->dev, "This device is a pre-production "
"adapter/LOM. Please be aware there may be issues "
"associated with your hardware. If you are "
"experiencing problems please contact your Intel or "
"hardware representative who provided you with this "
"hardware.\n");
}
strcpy(netdev->name, "eth%d");
err = register_netdev(netdev); // 注册netdev
if (err)
goto err_register;
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
INIT_WORK(&adapter->fdir_reinit_task, ixgbe_fdir_reinit_task);
#ifdef CONFIG_IXGBE_DCA
if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
ixgbe_setup_dca(adapter);
}
#endif
/* add san mac addr to netdev */
ixgbe_add_sanmac_netdev(netdev);
dev_info(&pdev->dev, "Intel(R) 10 Gigabit Network Connection\n");
cards_found++;
return 0;
err_register:
ixgbe_release_hw_control(adapter);
ixgbe_clear_interrupt_scheme(adapter);
err_sw_init:
err_eeprom:
clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
del_timer_sync(&adapter->sfp_timer);
cancel_work_sync(&adapter->sfp_task);
cancel_work_sync(&adapter->multispeed_fiber_task);
cancel_work_sync(&adapter->sfp_config_module_task);
iounmap(hw->hw_addr);
err_ioremap:
free_netdev(netdev);
err_alloc_etherdev:
pci_release_selected_regions(pdev, pci_select_bars(pdev,
IORESOURCE_MEM));
err_pci_reg:
err_dma:
pci_disable_device(pdev);
return err;
}
主要步骤
1、根据网卡型号(82598/82599/540/550)选择ixgbe_info
const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
static const struct ixgbe_info *ixgbe_info_tbl[] = {
[board_82598] = &ixgbe_82598_info,
[board_82599] = &ixgbe_82599_info,
[board_X540] = &ixgbe_X540_info,
[board_X550] = &ixgbe_X550_info,
[board_X550EM_x] = &ixgbe_X550EM_x_info,
[board_x550em_x_fw] = &ixgbe_x550em_x_fw_info,
[board_x550em_a] = &ixgbe_x550em_a_info,
[board_x550em_a_fw] = &ixgbe_x550em_a_fw_info,
};
enum ixgbe_boards {
board_82598,
board_82599,
board_X540,
board_X550,
board_X550EM_x,
board_x550em_x_fw,
board_x550em_a,
board_x550em_a_fw,
};
const struct ixgbe_info ixgbe_82599_info = {
.mac = ixgbe_mac_82599EB,
.get_invariants = &ixgbe_get_invariants_82599,
.mac_ops = &mac_ops_82599,
.eeprom_ops = &eeprom_ops_82599,
.phy_ops = &phy_ops_82599,
.mbx_ops = &mbx_ops_generic,
.mvals = ixgbe_mvals_8259X,
};
分配net_device和ixgbe_adapter
netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);
struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
{
return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
}
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
struct netdev_queue *tx;
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
alloc_size = sizeof(struct net_device); // net_device的大小
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv; // 加上private data的大小
}
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
p = kzalloc(alloc_size, GFP_KERNEL); // 分配net_device和private data
if (!p) {
printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
return NULL;
}
// 分配queue_count个netdev_queue(发送队列数组),一个发送队列对应一个netdev_queue
tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
if (!tx) {
printk(KERN_ERR "alloc_netdev: Unable to allocate "
"tx qdiscs.\n");
goto free_p;
}
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
if (dev_addr_init(dev))
goto free_tx;
dev_unicast_init(dev);
dev_net_set(dev, &init_net);
dev->_tx = tx; // 保存发送队列数组
dev->num_tx_queues = queue_count; // 设置发送队列数
dev->real_num_tx_queues = queue_count; // 设置实际发送队列数
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev); // 设置dev->_tx[i]->dev和dev->rx_queue->dev为dev
INIT_LIST_HEAD(&dev->napi_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev); // 以太网为ether_setup()
strcpy(dev->name, name);
return dev;
free_tx:
kfree(tx);
free_p:
kfree(p);
return NULL;
}
static void netdev_init_queues(struct net_device *dev)
{
netdev_init_one_queue(dev, &dev->rx_queue, NULL);
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
spin_lock_init(&dev->tx_global_lock);
}
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
void *_unused)
{
queue->dev = dev;
}
static inline void netdev_for_each_tx_queue(struct net_device *dev,
void (*f)(struct net_device *,
struct netdev_queue *,
void *),
void *arg)
{
unsigned int i;
for (i = 0; i < dev->num_tx_queues; i++)
f(dev, &dev->_tx[i], arg);
}
void ether_setup(struct net_device *dev)
{
dev->header_ops = ð_header_ops;
dev->type = ARPHRD_ETHER; // 以太网格式
dev->hard_header_len = ETH_HLEN; // 14
dev->mtu = ETH_DATA_LEN; // 1500
dev->addr_len = ETH_ALEN; // 6
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
memset(dev->broadcast, 0xFF, ETH_ALEN);
}
读取eeprom中的mac地址,写入hw->mac.perm_addr
err = hw->mac.ops.reset_hw(hw);
struct ixgbe_info ixgbe_82599_info = {
.mac = ixgbe_mac_82599EB,
.get_invariants = &ixgbe_get_invariants_82599,
.mac_ops = &mac_ops_82599,
.eeprom_ops = &eeprom_ops_82599,
.phy_ops = &phy_ops_82599,
};
static struct ixgbe_mac_operations mac_ops_82599 = {
.init_hw = &ixgbe_init_hw_generic,
.reset_hw = &ixgbe_reset_hw_82599,
.start_hw = &ixgbe_start_hw_82599,
.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic,
.get_media_type = &ixgbe_get_media_type_82599,
.get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82599,
.enable_rx_dma = &ixgbe_enable_rx_dma_82599,
.get_mac_addr = &ixgbe_get_mac_addr_generic,
.get_san_mac_addr = &ixgbe_get_san_mac_addr_82599,
.get_device_caps = &ixgbe_get_device_caps_82599,
.stop_adapter = &ixgbe_stop_adapter_generic,
.get_bus_info = &ixgbe_get_bus_info_generic,
.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie,
.read_analog_reg8 = &ixgbe_read_analog_reg8_82599,
.write_analog_reg8 = &ixgbe_write_analog_reg8_82599,
.setup_link = &ixgbe_setup_mac_link_82599,
.check_link = &ixgbe_check_mac_link_82599,
.get_link_capabilities = &ixgbe_get_link_capabilities_82599,
.led_on = &ixgbe_led_on_generic,
.led_off = &ixgbe_led_off_generic,
.blink_led_start = &ixgbe_blink_led_start_generic,
.blink_led_stop = &ixgbe_blink_led_stop_generic,
.set_rar = &ixgbe_set_rar_generic,
.clear_rar = &ixgbe_clear_rar_generic,
.set_vmdq = &ixgbe_set_vmdq_82599,
.clear_vmdq = &ixgbe_clear_vmdq_82599,
.init_rx_addrs = &ixgbe_init_rx_addrs_generic,
.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic,
.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic,
.enable_mc = &ixgbe_enable_mc_generic,
.disable_mc = &ixgbe_disable_mc_generic,
.clear_vfta = &ixgbe_clear_vfta_82599,
.set_vfta = &ixgbe_set_vfta_82599,
.fc_enable = &ixgbe_fc_enable_generic,
.init_uta_tables = &ixgbe_init_uta_tables_82599,
.setup_sfp = &ixgbe_setup_sfp_modules_82599,
};
static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
{
s32 status = 0;
u32 ctrl, ctrl_ext;
u32 i;
u32 autoc;
u32 autoc2;
/* Call adapter stop to disable tx/rx and clear interrupts */
hw->mac.ops.stop_adapter(hw);
/* PHY ops must be identified and initialized prior to reset */
/* Init PHY and function pointers, perform SFP setup */
status = hw->phy.ops.init(hw);
if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
goto reset_hw_out;
/* Setup SFP module if there is one present. */
if (hw->phy.sfp_setup_needed) {
status = hw->mac.ops.setup_sfp(hw);
hw->phy.sfp_setup_needed = false;
}
/* Reset PHY */
if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
hw->phy.ops.reset(hw);
/*
* Prevent the PCI-E bus from from hanging by disabling PCI-E master
* access and verify no pending requests before reset
*/
status = ixgbe_disable_pcie_master(hw);
if (status != 0) {
status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
hw_dbg(hw, "PCI-E Master disable polling has failed.\n");
}
/*
* Issue global reset to the MAC. This needs to be a SW reset.
* If link reset is used, it might reset the MAC when mng is using it
*/
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | IXGBE_CTRL_RST));
IXGBE_WRITE_FLUSH(hw);
/* Poll for reset bit to self-clear indicating reset is complete */
for (i = 0; i < 10; i++) {
udelay(1);
ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
if (!(ctrl & IXGBE_CTRL_RST))
break;
}
if (ctrl & IXGBE_CTRL_RST) {
status = IXGBE_ERR_RESET_FAILED;
hw_dbg(hw, "Reset polling failed to complete.\n");
}
/* Clear PF Reset Done bit so PF/VF Mail Ops can work */
ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
msleep(50);
/*
* Store the original AUTOC/AUTOC2 values if they have not been
* stored off yet. Otherwise restore the stored original
* values since the reset operation sets back to defaults.
*/
autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
if (hw->mac.orig_link_settings_stored == false) {
hw->mac.orig_autoc = autoc;
hw->mac.orig_autoc2 = autoc2;
hw->mac.orig_link_settings_stored = true;
} else {
if (autoc != hw->mac.orig_autoc)
IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
IXGBE_AUTOC_AN_RESTART));
if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
(hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
autoc2 |= (hw->mac.orig_autoc2 &
IXGBE_AUTOC2_UPPER_MASK);
IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
}
}
/*
* Store MAC address from RAR0, clear receive address registers, and
* clear the multicast table. Also reset num_rar_entries to 128,
* since we modify this value when programming the SAN MAC address.
*/
hw->mac.num_rar_entries = 128;
hw->mac.ops.init_rx_addrs(hw);
/* Store the permanent mac address */
hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); // 读取eeprom中的mac地址,写入hw->mac.perm_addr
/* Store the permanent SAN mac address */
hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
/* Add the SAN MAC address to the RAR only if it's a valid address */
if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
hw->mac.san_addr, 0, IXGBE_RAH_AV);
/* Reserve the last RAR for the SAN MAC address */
hw->mac.num_rar_entries--;
}
reset_hw_out:
return status;
}
s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
{
u32 rar_high;
u32 rar_low;
u16 i;
rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
for (i = 0; i < 4; i++)
mac_addr[i] = (u8)(rar_low >> (i*8));
for (i = 0; i < 2; i++)
mac_addr[i+4] = (u8)(rar_high >> (i*8));
return 0;
}
#define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
(0x0A200 + ((_i) * 8)))
#define IXGBE_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
(0x0A204 + ((_i) * 8)))
ixgbe_init_interrupt_scheme()
int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
{
int err;
/* Number of supported queues */
ixgbe_set_num_queues(adapter); // 根据FDIR/RSS设置adapter->num_tx/rx_queues
err = ixgbe_set_interrupt_capability(adapter); // 向PCI子系统请求中断
if (err) {
DPRINTK(PROBE, ERR, "Unable to setup interrupt capabilities\n");
goto err_set_interrupt;
}
err = ixgbe_alloc_q_vectors(adapter); // 设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
if (err) {
DPRINTK(PROBE, ERR, "Unable to allocate memory for queue "
"vectors\n");
goto err_alloc_q_vectors;
}
err = ixgbe_alloc_queues(adapter); // 分配发送/接收ring数组
if (err) {
DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n");
goto err_alloc_queues;
}
DPRINTK(DRV, INFO, "Multiqueue %s: Rx Queue count = %u, "
"Tx Queue count = %u\n",
(adapter->num_rx_queues > 1) ? "Enabled" :
"Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
set_bit(__IXGBE_DOWN, &adapter->state);
return 0;
err_alloc_queues:
ixgbe_free_q_vectors(adapter);
err_alloc_q_vectors:
ixgbe_reset_interrupt_capability(adapter);
err_set_interrupt:
return err;
}
ixgbe_set_num_queues()
static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
{
#ifdef IXGBE_FCOE
if (ixgbe_set_fcoe_queues(adapter))
goto done;
#endif /* IXGBE_FCOE */
#ifdef CONFIG_IXGBE_DCB
if (ixgbe_set_dcb_queues(adapter))
goto done;
#endif
if (ixgbe_set_fdir_queues(adapter))
goto done;
if (ixgbe_set_rss_queues(adapter))
goto done;
/* fallback to base case */
adapter->num_rx_queues = 1;
adapter->num_tx_queues = 1;
done:
/* Notify the stack of the (possibly) reduced Tx Queue count. */
adapter->netdev->real_num_tx_queues = adapter->num_tx_queues; // 设置实际发送队列数
}
static bool inline ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter)
{
bool ret = false;
struct ixgbe_ring_feature *f_fdir = &adapter->ring_feature[RING_F_FDIR];
// min(CPU数, IXGBE_MAX_FDIR_INDICES(64))
f_fdir->indices = min((int)num_online_cpus(), f_fdir->indices);
f_fdir->mask = 0;
/* Flow Director must have RSS enabled */
if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)))) {
adapter->num_tx_queues = f_fdir->indices; // 设置发送队列数为min(CPU数, 64)
adapter->num_rx_queues = f_fdir->indices; // 设置接收队列数为min(CPU数, 64)
ret = true;
} else {
adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
}
return ret;
}
static inline bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
{
bool ret = false;
struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_RSS];
if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
f->mask = 0xF;
adapter->num_rx_queues = f->indices; // 设置接收队列数为min(CPU数, 16)
adapter->num_tx_queues = f->indices; // 设置发送队列数为min(CPU数, 16)
ret = true;
} else {
ret = false;
}
return ret;
}
ixgbe_set_interrupt_capability()
static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
int err = 0;
int vector, v_budget;
/*
* It's easy to be greedy for MSI-X vectors, but it really
* doesn't do us much good if we have a lot more vectors
* than CPU's. So let's be conservative and only ask for
* (roughly) twice the number of vectors as there are CPU's.
*/
// 计算ixgbe0的msix中断数,NON_Q_VECTORS对应的misx中断(LSC等)绑定ixgbe0所在NUMA的所有CPU
v_budget = min(adapter->num_rx_queues + adapter->num_tx_queues,
(int)(num_online_cpus() * 2)) + NON_Q_VECTORS;
/*
* At the same time, hardware can only support a maximum of
* hw.mac->max_msix_vectors vectors. With features
* such as RSS and VMDq, we can easily surpass the number of Rx and Tx
* descriptor queues supported by our device. Thus, we cap it off in
* those rare cases where the cpu count also exceeds our vector limit.
*/
v_budget = min(v_budget, (int)hw->mac.max_msix_vectors);
/* A failure in MSI-X entry allocation isn't fatal, but it does
* mean we disable MSI-X capabilities of the adapter. */
// 分配v_budget个msix_entry,地址赋给adapter->msix_entries
adapter->msix_entries = kcalloc(v_budget,
sizeof(struct msix_entry), GFP_KERNEL);
if (adapter->msix_entries) {
for (vector = 0; vector < v_budget; vector++)
adapter->msix_entries[vector].entry = vector;
ixgbe_acquire_msix_vectors(adapter, v_budget); // 向PCI子系统请求v_budget个msix中断
if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
goto out;
}
adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
adapter->atr_sample_rate = 0;
ixgbe_set_num_queues(adapter);
err = pci_enable_msi(adapter->pdev); // 向PCI子系统请求1个msi中断
if (!err) {
adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
} else {
DPRINTK(HW, DEBUG, "Unable to allocate MSI interrupt, "
"falling back to legacy. Error: %d\n", err);
/* reset err */
err = 0;
}
out:
return err;
}
static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
int vectors)
{
int err, vector_threshold;
/* We'll want at least 3 (vector_threshold):
* 1) TxQ[0] Cleanup
* 2) RxQ[0] Cleanup
* 3) Other (Link Status Change, etc.)
* 4) TCP Timer (optional)
*/
vector_threshold = MIN_MSIX_COUNT;
/* The more we get, the more we will assign to Tx/Rx Cleanup
* for the separate queues...where Rx Cleanup >= Tx Cleanup.
* Right now, we simply care about how many we'll get; we'll
* set them up later while requesting irq's.
*/
while (vectors >= vector_threshold) {
// 向PCI子系统请求vectors个msix中断,将中断号写入adapter->msix_entries[i].vector
err = pci_enable_msix(adapter->pdev, adapter->msix_entries,
vectors);
if (!err) /* Success in acquiring all requested vectors. */
break;
else if (err < 0)
vectors = 0; /* Nasty failure, quit now */
else /* err == number of vectors we should try again with */
vectors = err;
}
if (vectors < vector_threshold) {
/* Can't allocate enough MSI-X interrupts? Oh well.
* This just means we'll go with either a single MSI
* vector or fall back to legacy interrupts.
*/
DPRINTK(HW, DEBUG, "Unable to allocate MSI-X interrupts\n");
adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
} else {
adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */
/*
* Adjust for only the vectors we'll use, which is minimum
* of max_msix_q_vectors + NON_Q_VECTORS, or the number of
* vectors we were allocated.
*/
adapter->num_msix_vectors = min(vectors,
adapter->max_msix_q_vectors + NON_Q_VECTORS);
}
}
ixgbe_alloc_q_vectors()和ixgbe_alloc_queues()
static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
{
int q_idx, num_q_vectors;
struct ixgbe_q_vector *q_vector;
int napi_vectors;
int (*poll)(struct napi_struct *, int);
if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { // 使用MSIX(Message Signaled Interrupt-X)
// 去掉绑定ixgbe0所在NUMA的所有CPU的msix中断(LSC等)
num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
napi_vectors = adapter->num_rx_queues;
poll = &ixgbe_clean_rxtx_many; // 设置poll函数为ixgbe_clean_rxtx_many()
} else { // 其它
num_q_vectors = 1;
napi_vectors = 1;
poll = &ixgbe_poll; // 设置poll函数为ixgbe_poll()
}
for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
q_vector = kzalloc(sizeof(struct ixgbe_q_vector), GFP_KERNEL); // 分配ixgbe_q_vector
if (!q_vector)
goto err_out;
q_vector->adapter = adapter;
if (q_vector->txr_count && !q_vector->rxr_count)
q_vector->eitr = adapter->tx_eitr_param;
else
q_vector->eitr = adapter->rx_eitr_param;
q_vector->v_idx = q_idx;
/* 初始化q_vector->napi并加入adapter->netdev的napi_list,
其中poll函数为ixgbe_clean_rxtx_many()/ixgbe_poll(),一次poll的最大报文数为64 */
netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
adapter->q_vector[q_idx] = q_vector; // 地址赋给adapter->q_vector[q_idx]
}
return 0;
err_out:
while (q_idx) {
q_idx--;
q_vector = adapter->q_vector[q_idx];
netif_napi_del(&q_vector->napi);
kfree(q_vector);
adapter->q_vector[q_idx] = NULL;
}
return -ENOMEM;
}
static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
{
int i;
// 分配num_tx_queues个ixgbe_ring(发送ring数组),地址赋给adapter->tx_ring
adapter->tx_ring = kcalloc(adapter->num_tx_queues,
sizeof(struct ixgbe_ring), GFP_KERNEL);
if (!adapter->tx_ring)
goto err_tx_ring_allocation;
// 分配num_rx_queues个ixgbe_ring(接收ring数组),地址赋给adapter->rx_ring
adapter->rx_ring = kcalloc(adapter->num_rx_queues,
sizeof(struct ixgbe_ring), GFP_KERNEL);
if (!adapter->rx_ring)
goto err_rx_ring_allocation;
for (i = 0; i < adapter->num_tx_queues; i++) {
adapter->tx_ring[i].count = adapter->tx_ring_count; // 设置tx_ring[i].count
adapter->tx_ring[i].queue_index = i; // 设置tx_ring[i].queue_index
}
for (i = 0; i < adapter->num_rx_queues; i++) {
adapter->rx_ring[i].count = adapter->rx_ring_count; // 设置rx_ring[i].count
adapter->rx_ring[i].queue_index = i; // 设置rx_ring[i].queue_index
}
ixgbe_cache_ring_register(adapter); // 设置tx/rx_ring[i].reg_idx
return 0;
err_rx_ring_allocation:
kfree(adapter->tx_ring);
err_tx_ring_allocation:
return -ENOMEM;
}