ethtool工具是网卡调试、问题排查常用工具;其框架如下图:
用户空间源码可以在系统上直接安装,主要负责接收用户输入 ethtool 命令内容,把用户输入的内容转换为发送到内核的 ioctl() 函数识别格式。
一、ethtool 用户源码分析
1、ethtool 子命令数据管理方法
/* Context for sub-commands */
struct cmd_context {
const char *devname; /* net device name */
int fd; /* socket suitable for ethtool ioctl */
struct ifreq ifr; /* ifreq suitable for ethtool ioctl */
int argc; /* number of arguments to the sub-command */
char **argp; /* arguments to the sub-command */
};
2、 ethtool 命令可选项参数args[]
ethtool 所有命令集合如下。
static const struct option {
const char *opts;
int want_device;
int (*func)(struct cmd_context *);
char *help;
char *opthelp;
} args[] = {
{ "-s|--change", 1, do_sset, "Change generic options",
" [ speed %d ]\n"
" [ duplex half|full ]\n"
" [ port tp|aui|bnc|mii|fibre ]\n"
" [ mdix auto|on|off ]\n"
" [ autoneg on|off ]\n"
" [ advertise %x ]\n"
" [ phyad %d ]\n"
" [ xcvr internal|external ]\n"
" [ wol p|u|m|b|a|g|s|f|d... ]\n"
" [ sopass %x:%x:%x:%x:%x:%x ]\n"
" [ msglvl %d | msglvl type on|off ... ]\n" },
{ "-a|--show-pause", 1, do_gpause, "Show pause options" },
{ "-A|--pause", 1, do_spause, "Set pause options",
" [ autoneg on|off ]\n"
" [ rx on|off ]\n"
" [ tx on|off ]\n" },
{ "-c|--show-coalesce", 1, do_gcoalesce, "Show coalesce options" },
{ "-C|--coalesce", 1, do_scoalesce, "Set coalesce options",
" [adaptive-rx on|off]\n"
" [adaptive-tx on|off]\n"
" [rx-usecs N]\n"
" [rx-frames N]\n"
" [rx-usecs-irq N]\n"
" [rx-frames-irq N]\n"
" [tx-usecs N]\n"
" [tx-frames N]\n"
" [tx-usecs-irq N]\n"
" [tx-frames-irq N]\n"
" [stats-block-usecs N]\n"
" [pkt-rate-low N]\n"
" [rx-usecs-low N]\n"
" [rx-frames-low N]\n"
" [tx-usecs-low N]\n"
" [tx-frames-low N]\n"
" [pkt-rate-high N]\n"
" [rx-usecs-high N]\n"
" [rx-frames-high N]\n"
" [tx-usecs-high N]\n"
" [tx-frames-high N]\n"
" [sample-interval N]\n" },
{ "-g|--show-ring", 1, do_gring, "Query RX/TX ring parameters" },
{ "-G|--set-ring", 1, do_sring, "Set RX/TX ring parameters",
" [ rx N ]\n"
" [ rx-mini N ]\n"
" [ rx-jumbo N ]\n"
" [ tx N ]\n" },
{ "-k|--show-features|--show-offload", 1, do_gfeatures,
"Get state of protocol offload and other features" },
{ "-K|--features|--offload", 1, do_sfeatures,
"Set protocol offload and other features",
" FEATURE on|off ...\n" },
{ "-i|--driver", 1, do_gdrv, "Show driver information" },
{ "-d|--register-dump", 1, do_gregs, "Do a register dump",
" [ raw on|off ]\n"
" [ file FILENAME ]\n" },
{ "-e|--eeprom-dump", 1, do_geeprom, "Do a EEPROM dump",
" [ raw on|off ]\n"
" [ offset N ]\n"
" [ length N ]\n" },
{ "-E|--change-eeprom", 1, do_seeprom,
"Change bytes in device EEPROM",
" [ magic N ]\n"
" [ offset N ]\n"
" [ length N ]\n"
" [ value N ]\n" },
{ "-r|--negotiate", 1, do_nway_rst, "Restart N-WAY negotiation" },
{ "-p|--identify", 1, do_phys_id,
"Show visible port identification (e.g. blinking)",
" [ TIME-IN-SECONDS ]\n" },
{ "-t|--test", 1, do_test, "Execute adapter self test",
" [ online | offline | external_lb ]\n" },
{ "-S|--statistics", 1, do_gnicstats, "Show adapter statistics" },
{ "--phy-statistics", 1, do_gphystats,
"Show phy statistics" },
{ "-n|-u|--show-nfc|--show-ntuple", 1, do_grxclass,
"Show Rx network flow classification options or rules",
" [ rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|"
"tcp6|udp6|ah6|esp6|sctp6 [context %d] |\n"
" rule %d ]\n" },
{ "-N|-U|--config-nfc|--config-ntuple", 1, do_srxclass,
"Configure Rx network flow classification options or rules",
" rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|"
"tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r... [context %d] |\n"
" flow-type ether|ip4|tcp4|udp4|sctp4|ah4|esp4|"
"ip6|tcp6|udp6|ah6|esp6|sctp6\n"
" [ src %x:%x:%x:%x:%x:%x [m %x:%x:%x:%x:%x:%x] ]\n"
" [ dst %x:%x:%x:%x:%x:%x [m %x:%x:%x:%x:%x:%x] ]\n"
" [ proto %d [m %x] ]\n"
" [ src-ip IP-ADDRESS [m IP-ADDRESS] ]\n"
" [ dst-ip IP-ADDRESS [m IP-ADDRESS] ]\n"
" [ tos %d [m %x] ]\n"
" [ tclass %d [m %x] ]\n"
" [ l4proto %d [m %x] ]\n"
" [ src-port %d [m %x] ]\n"
" [ dst-port %d [m %x] ]\n"
" [ spi %d [m %x] ]\n"
" [ vlan-etype %x [m %x] ]\n"
" [ vlan %x [m %x] ]\n"
" [ user-def %x [m %x] ]\n"
" [ dst-mac %x:%x:%x:%x:%x:%x [m %x:%x:%x:%x:%x:%x] ]\n"
" [ action %d ] | [ vf %d queue %d ]\n"
" [ context %d ]\n"
" [ loc %d]] |\n"
" delete %d\n" },
{ "-T|--show-time-stamping", 1, do_tsinfo,
"Show time stamping capabilities" },
{ "-x|--show-rxfh-indir|--show-rxfh", 1, do_grxfh,
"Show Rx flow hash indirection table and/or RSS hash key",
" [ context %d ]\n" },
{ "-X|--set-rxfh-indir|--rxfh", 1, do_srxfh,
"Set Rx flow hash indirection table and/or RSS hash key",
" [ context %d|new ]\n"
" [ equal N | weight W0 W1 ... | default ]\n"
" [ hkey %x:%x:%x:%x:%x:.... ]\n"
" [ hfunc FUNC ]\n"
" [ delete ]\n" },
{ "-f|--flash", 1, do_flash,
"Flash firmware image from the specified file to a region on the device",
" FILENAME [ REGION-NUMBER-TO-FLASH ]\n" },
{ "-P|--show-permaddr", 1, do_permaddr,
"Show permanent hardware address" },
{ "-w|--get-dump", 1, do_getfwdump,
"Get dump flag, data",
" [ data FILENAME ]\n" },
{ "-W|--set-dump", 1, do_setfwdump,
"Set dump flag of the device",
" N\n"},
{ "-l|--show-channels", 1, do_gchannels, "Query Channels" },
{ "-L|--set-channels", 1, do_schannels, "Set Channels",
" [ rx N ]\n"
" [ tx N ]\n"
" [ other N ]\n"
" [ combined N ]\n" },
{ "--show-priv-flags", 1, do_gprivflags, "Query private flags" },
{ "--set-priv-flags", 1, do_sprivflags, "Set private flags",
" FLAG on|off ...\n" },
{ "-m|--dump-module-eeprom|--module-info", 1, do_getmodule,
"Query/Decode Module EEPROM information and optical diagnostics if available",
" [ raw on|off ]\n"
" [ hex on|off ]\n"
" [ offset N ]\n"
" [ length N ]\n" },
{ "--show-eee", 1, do_geee, "Show EEE settings"},
{ "--set-eee", 1, do_seee, "Set EEE settings",
" [ eee on|off ]\n"
" [ advertise %x ]\n"
" [ tx-lpi on|off ]\n"
" [ tx-timer %d ]\n"},
{ "--set-phy-tunable", 1, do_set_phy_tunable, "Set PHY tunable",
" [ downshift on|off [count N] ]\n"
" [ fast-link-down on|off [msecs N] ]\n"},
{ "--get-phy-tunable", 1, do_get_phy_tunable, "Get PHY tunable",
" [ downshift ]\n"
" [ fast-link-down ]\n"},
{ "--reset", 1, do_reset, "Reset components",
" [ flags %x ]\n"
" [ mgmt ]\n"
" [ mgmt-shared ]\n"
" [ irq ]\n"
" [ irq-shared ]\n"
" [ dma ]\n"
" [ dma-shared ]\n"
" [ filter ]\n"
" [ filter-shared ]\n"
" [ offload ]\n"
" [ offload-shared ]\n"
" [ mac ]\n"
" [ mac-shared ]\n"
" [ phy ]\n"
" [ phy-shared ]\n"
" [ ram ]\n"
" [ ram-shared ]\n"
" [ ap ]\n"
" [ ap-shared ]\n"
" [ dedicated ]\n"
" [ all ]\n"},
{ "--show-fec", 1, do_gfec, "Show FEC settings"},
{ "--set-fec", 1, do_sfec, "Set FEC settings",
" [ encoding auto|off|rs|baser [...]]\n"},
{ "-Q|--per-queue", 1, do_perqueue, "Apply per-queue command."
"The supported sub commands include --show-coalesce, --coalesce",
" [queue_mask %x] SUB_COMMAND\n"},
{ "-h|--help", 0, show_usage, "Show this help" },
{ "--version", 0, do_version, "Show version number" },
{}
};
3、ethtool命令入口函数
int main(int argc, char **argp)
{
int (*func)(struct cmd_context *);
int want_device;
struct cmd_context ctx;
int k;
init_global_link_mode_masks();
/* Skip command name */
argp++;
argc--;
/* First argument must be either a valid option or a device
* name to get settings for (which we don't expect to begin
* with '-').
*/
if (argc == 0)
exit_bad_args();
/* 1. 查找 strcut option args[] 数组中子命令 */
k = find_option(argc, argp);
if (k >= 0) {
argp++;
argc--;
func = args[k].func;
want_device = args[k].want_device;
goto opt_found;
}
if ((*argp)[0] == '-')
exit_bad_args();
func = do_gset;
want_device = 1;
opt_found:
if (want_device) {
ctx.devname = *argp++;
argc--;
if (ctx.devname == NULL)
exit_bad_args();
if (strlen(ctx.devname) >= IFNAMSIZ)
exit_bad_args();
/* Setup our control structures. */
memset(&ctx.ifr, 0, sizeof(ctx.ifr));
strcpy(ctx.ifr.ifr_name, ctx.devname);
/* 2.通过设备名称、打开控制socket 接口,即 Open control socket. */
ctx.fd = socket(AF_INET, SOCK_DGRAM, 0);
if (ctx.fd < 0)
ctx.fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (ctx.fd < 0) {
perror("Cannot get control socket");
return 70;
}
} else {
ctx.fd = -1;
}
ctx.argc = argc;
ctx.argp = argp;
return func(&ctx); /* 3. 执行 ethtool 各选项对应的命令,如:do_gring 命令 */
}
源码中有三个点注释,请参考源码即数据存储方法分析。
4 实例解析: ethtool -g ens18
命令执行结果如下:
$ ethtool -g ens18
Ring parameters for ens18:
Pre-set maximums:
RX: 256
RX Mini: 0
RX Jumbo: 0
TX: 256
Current hardware settings:
RX: 256
RX Mini: 0
RX Jumbo: 0
TX: 256
ethtool -g 对应的源码
static int do_gring(struct cmd_context *ctx)
{
struct ethtool_ringparam ering;
int err;
if (ctx->argc != 0)
exit_bad_args();
fprintf(stdout, "Ring parameters for %s:\n", ctx->devname);
ering.cmd = ETHTOOL_GRINGPARAM; /* 1. -g 对应的命令 ETHTOOL_GRINGPARAM */
err = send_ioctl(ctx, &ering); /* 2. 调用 ioctl() 数据发送到驱动程序 */
if (err == 0) {
err = dump_ring(&ering);
if (err)
return err;
} else {
perror("Cannot get device ring settings");
return 76;
}
return 0;
}
// send_ioctl 函数
#ifndef TEST_ETHTOOL
int send_ioctl(struct cmd_context *ctx, void *cmd)
{
ctx->ifr.ifr_data = cmd; /* 3. ioctl系统调用 SIOCETHTOOL 关键键,驱动程序可识别 */
return ioctl(ctx->fd, SIOCETHTOOL, &ctx->ifr);
}
#endif
5、实例解析1:ethtool -h
static int show_usage(struct cmd_context *ctx)
{
int i;
/* ethtool -h */
fprintf(stdout, PACKAGE " version " VERSION "\n");
fprintf(stdout,
"Usage:\n"
" ethtool DEVNAME\t"
"Display standard information about device\n");
for (i = 0; args[i].opts; i++) { /* 读取全局 args[] 数组,逐条打印 */
fputs(" ethtool ", stdout);
fprintf(stdout, "%s %s\t%s\n",
args[i].opts,
args[i].want_device ? "DEVNAME" : "\t",
args[i].help);
if (args[i].opthelp)
fputs(args[i].opthelp, stdout);
}
return 0;
}
二、内核空间
参考内核 e1000 网卡驱动分析。
1、驱动程序入口
static struct pci_driver e1000_driver = {
.name = e1000_driver_name, // "e1000"
.id_table = e1000_pci_tbl, // PCI VENDOR ID OF INTEL
.probe = e1000_probe,
.remove = e1000_remove,
#ifdef CONFIG_PM
/* Power Management Hooks */
.suspend = e1000_suspend,
.resume = e1000_resume,
#endif
.shutdown = e1000_shutdown,
.err_handler = &e1000_err_handler
};
MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
2、e1000 网卡驱动 probe
static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct net_device *netdev;
struct e1000_adapter *adapter = NULL;
struct e1000_hw *hw;
static int cards_found;
static int global_quad_port_a; /* global ksp3 port a indication */
int i, err, pci_using_dac;
/* pci 网卡驱动,扫描网卡存在 */
bars = pci_select_bars(pdev, IORESOURCE_MEM);
err = pci_enable_device_mem(pdev);
err = pci_request_selected_regions(pdev, bars, e1000_driver_name);
pci_set_master(pdev);
err = pci_save_state(pdev);
/* 创建网卡 */
netdev = alloc_etherdev(sizeof(struct e1000_adapter));
SET_NETDEV_DEV(netdev, &pdev->dev);
pci_set_drvdata(pdev, netdev);
adapter = netdev_priv(netdev);
err = e1000_init_hw_struct(adapter, hw); /* 网卡 PHY 函数集合 */
netdev->netdev_ops = &e1000_netdev_ops; /* 操作网卡函数集合 */
e1000_set_ethtool_ops(netdev); /* 网卡工具 ethtool 函数 !!! */
netdev->watchdog_timeo = 5 * HZ;
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
err = register_netdev(netdev); /* 向内核注册网卡设备 */
e1000_vlan_filter_on_off(adapter, false);
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
e_info(probe, "Intel(R) PRO/1000 Network Connection\n");
}
3、把 e1000_ethtool_ops 赋值给、网卡设备 ethtool_ops
void e1000_set_ethtool_ops(struct net_device *netdev)
{
netdev->ethtool_ops = &e1000_ethtool_ops;
}
把e1000 网卡所支持的 ethtool 函数关联到网卡驱动上。
4、e1000 网卡驱动支持的 ethtool 功能列表
static const struct ethtool_ops e1000_ethtool_ops = {
.get_drvinfo = e1000_get_drvinfo,
.get_regs_len = e1000_get_regs_len,
.get_regs = e1000_get_regs,
.get_wol = e1000_get_wol,
.set_wol = e1000_set_wol,
.get_msglevel = e1000_get_msglevel,
.set_msglevel = e1000_set_msglevel,
.nway_reset = e1000_nway_reset,
.get_link = e1000_get_link,
.get_eeprom_len = e1000_get_eeprom_len,
.get_eeprom = e1000_get_eeprom,
.set_eeprom = e1000_set_eeprom,
.get_ringparam = e1000_get_ringparam,
.set_ringparam = e1000_set_ringparam,
.get_pauseparam = e1000_get_pauseparam,
.set_pauseparam = e1000_set_pauseparam,
.self_test = e1000_diag_test,
.get_strings = e1000_get_strings,
.set_phys_id = e1000_set_phys_id,
.get_ethtool_stats = e1000_get_ethtool_stats,
.get_sset_count = e1000_get_sset_count,
.get_coalesce = e1000_get_coalesce,
.set_coalesce = e1000_set_coalesce,
.get_ts_info = ethtool_op_get_ts_info,
.get_link_ksettings = e1000_get_link_ksettings,
.set_link_ksettings = e1000_set_link_ksettings,
};
5、用户态如何与网卡驱动的ioctl关联起来?
用户空间使用的 socket netlink 发送的 ioctl()到socket 文件套接字,所以 socket_file_ops中的 ioctl() 是 ethtool 命令调用程序入口;如下:
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl, /* sock ioctl 入口处 */
#endif
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};
compat_sock_ioctl 函数最终调用 ethtool_ioctl () 、该函数调用 dev_ioctl( ) 函数;调用关系如下:
static long compat_sock_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct socket *sock = file->private_data;
int ret = -ENOIOCTLCMD;
struct sock *sk;
struct net *net;
sk = sock->sk;
net = sock_net(sk);
if (sock->ops->compat_ioctl)
ret = sock->ops->compat_ioctl(sock, cmd, arg);
ret = compat_sock_ioctl_trans(file, sock, cmd, arg); /* socket ioctl 入口 */
return ret;
}
static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
void __user *argp = compat_ptr(arg);
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
return compat_ifr_data_ioctl(net, cmd, argp);
switch (cmd) {
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
case SIOCGIFNAME:
return dev_ifname32(net, argp);
case SIOCGIFCONF:
return dev_ifconf(net, argp);
case SIOCETHTOOL:
return ethtool_ioctl(net, argp); /* ethtool ioctl () 入口 */
case SIOCWANDEV:
return compat_siocwandev(net, argp);
case SIOCGIFMAP:
case SIOCSIFMAP:
return compat_sioc_ifmap(net, cmd, argp);
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
return bond_ioctl(net, cmd, argp);
case SIOCADDRT:
case SIOCDELRT:
return routing_ioctl(net, sock, cmd, argp);
}
return -ENOIOCTLCMD;
}
static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
{
struct compat_ethtool_rxnfc __user *compat_rxnfc;
bool convert_in = false, convert_out = false;
size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
struct ethtool_rxnfc __user *rxnfc;
struct ifreq __user *ifr;
if (get_user(data, &ifr32->ifr_ifru.ifru_data))
return -EFAULT;
compat_rxnfc = compat_ptr(data);
if (get_user(ethcmd, &compat_rxnfc->cmd))
return -EFAULT;
ret = dev_ioctl(net, SIOCETHTOOL, ifr);
return 0;
}
dev_ioctl( ) 函数内容如下:
int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
int ret;
char *colon;
switch (cmd) {
case SIOCETHTOOL:
dev_load(net, ifr.ifr_name);
rtnl_lock();
ret = dev_ethtool(net, &ifr); /* 调用设备的ethtool_ops */
rtnl_unlock();
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
}
return ret;
default:
if (cmd == SIOCWANDEV ||
cmd == SIOCGHWTSTAMP ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
dev_load(net, ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
if (!ret && copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
return ret;
}
return -ENOTTY;
}
}
dev_ethtool 函数如下:
int dev_ethtool(struct net *net, struct ifreq *ifr)
{
/* 获取网卡驱动、e1000 网卡驱动时初始化 ethtool_ops */
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
void __user *useraddr = ifr->ifr_data;
u32 ethcmd, sub_cmd;
.......
if (dev->ethtool_ops->begin) {
rc = dev->ethtool_ops->begin(dev);
if (rc < 0)
return rc;
}
old_features = dev->features;
/* 此时执行命令实际调用函数就是 e1000_ethtool_ops 赋值函数 */
switch (ethcmd) {
case ETHTOOL_GSET:
rc = ethtool_get_settings(dev, useraddr);
break;
case ETHTOOL_SSET:
rc = ethtool_set_settings(dev, useraddr);
break;
case ETHTOOL_GDRVINFO:
rc = ethtool_get_drvinfo(dev, useraddr);
break;
case ETHTOOL_GREGS:
rc = ethtool_get_regs(dev, useraddr);
break;
.......
case ETHTOOL_SFECPARAM:
rc = ethtool_set_fecparam(dev, useraddr);
break;
default:
rc = -EOPNOTSUPP;
}
if (old_features != dev->features)
netdev_features_change(dev);
return rc;
}
由此过程分析用户空间与内核间的关系是通顺的,因此前面借用的那张图描述函数对应关系、有点出入,请读者自行验证,欢迎反馈验证结果。