使用的处理器为Xeon5122,4个核心,每个核心2个超线程。如下:
/ # cat /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 85
model name : Intel(R) Xeon(R) Gold 5122 CPU @ 3.60GHz
stepping : 4
microcode : 0x2000043
cpu MHz : 3600.000
cache size : 16896 KB
physical id : 0
siblings : 8
core id : 1
cpu cores : 4
在启动过程中,网卡驱动i40e打印了如下的警告信息,提示PCI-E可用的带宽有可能不能够达到最优的性能。当前的PCI-E的速率为5.0GT/s,宽度为x4。此网卡具有4个10G端口。
[ 24.878466] i40e 0000:b6:00.3: fw 4.40.35115 api 1.4 nvm 4.53 0x80001cdf 0.0.0
[ 25.202189] i40e 0000:b6:00.3: MAC address: 00:60:e0:6d:94:01
[ 25.210023] i40e 0000:b6:00.3: irq 411 for MSI/MSI-X
[ 25.215000] i40e 0000:b6:00.3: irq 412 for MSI/MSI-X
...
[ 25.294766] i40e 0000:b6:00.3: irq 428 for MSI/MSI-X
[ 25.398203] i40e 0000:b6:00.3: Added LAN device PF3 bus=0x00 func=0x03
[ 25.404713] i40e 0000:b6:00.3: PCI-Express: Speed 5.0GT/s Width x4
[ 25.410876] i40e 0000:b6:00.3: PCI-Express bandwidth available for this device may be insufficient for optimal performance.
[ 25.421968] i40e 0000:b6:00.3: Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.
[ 25.439172] i40e 0000:b6:00.3: Features: PF-id[3] VFs: 32 VSIs: 34 QP: 8 RX: 1BUF RSS FD_ATR FD_SB NTUPLE PTP VEPA
内核中的i40e_probe函数如下,以上的打印信息位于此函数的最后部分。其首先由PCI配置空间的PCI_EXP_LNKSTA中获取到协商的连接状态信息,函数i40e_set_pci_config_data负责解析连接状态信息,并且结果保存到hw->bus结构的成员width和speed中。
static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
/* Devices on the IOSF bus do not have this information and will report PCI Gen 1 x 1 by default so don't bother checking them. */
if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
char speed[PCI_SPEED_SIZE] = "Unknown";
char width[PCI_WIDTH_SIZE] = "Unknown";
/* Get the negotiated link width and speed from PCI config space */
pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA, &link_status);
i40e_set_pci_config_data(hw, link_status);
switch (hw->bus.speed) {
case i40e_bus_speed_8000:
strncpy(speed, "8.0", PCI_SPEED_SIZE); break;
case i40e_bus_speed_5000:
strncpy(speed, "5.0", PCI_SPEED_SIZE); break;
case i40e_bus_speed_2500:
strncpy(speed, "2.5", PCI_SPEED_SIZE); break;
default:
break;
}
switch (hw->bus.width) {
case i40e_bus_width_pcie_x8:
strncpy(width, "8", PCI_WIDTH_SIZE); break;
case i40e_bus_width_pcie_x4:
strncpy(width, "4", PCI_WIDTH_SIZE); break;
case i40e_bus_width_pcie_x2:
strncpy(width, "2", PCI_WIDTH_SIZE); break;
case i40e_bus_width_pcie_x1:
strncpy(width, "1", PCI_WIDTH_SIZE); break;
default:
break;
}
dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n", speed, width);
if (hw->bus.width < i40e_bus_width_pcie_x8 || hw->bus.speed < i40e_bus_speed_8000) {
dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance .\n");
dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rat e.\n");
}
}
i40e驱动在总线的位宽width小于i40e_bus_width_pcie_x8(值为8),或者总线的速率小于i40e_bus_speed_8000(值为8000)时,都会打印本文开头的警告信息。
enum i40e_bus_speed {
i40e_bus_speed_unknown = 0,
i40e_bus_speed_33 = 33,
i40e_bus_speed_66 = 66,
i40e_bus_speed_100 = 100,
i40e_bus_speed_120 = 120,
i40e_bus_speed_133 = 133,
i40e_bus_speed_2500 = 2500,
i40e_bus_speed_5000 = 5000,
i40e_bus_speed_8000 = 8000,
i40e_bus_speed_reserved
};
/* PCI bus widths */
enum i40e_bus_width {
i40e_bus_width_unknown = 0,
i40e_bus_width_pcie_x1 = 1,
i40e_bus_width_pcie_x2 = 2,
i40e_bus_width_pcie_x4 = 4,
i40e_bus_width_pcie_x8 = 8,
wikipedia网站上对PCI Express的带宽介绍,如下图。
如果速率为:5.0GT/s, 位宽为: x4,得到的吞吐量为2.0 GB/s,单位是字节Byte,转换为网络吞吐为20Gbit/s。只能满足2个10G网口的最大吞吐,满足不了另外的两个10G网口的需求。