背景
linux 网络驱动中,loopback驱动算是最为简单的,用处却很大,随便找个linux系统,在shell里面敲"ifconfig" 或者 “ip -s link list” 命令可以看到:
# ifconfig
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
UP LOOPBACK RUNNING MTU:65536 Metric:1
RX packets:593365 errors:0 dropped:0 overruns:0 frame:0
TX packets:593365 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1
RX bytes:86365483 (82.3 MiB) TX bytes:86365483 (82.3 MiB)
# 或
# ip -s link list
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
RX: bytes packets errors dropped overrun mcast
197217691 2769876 0 0 0 0
TX: bytes packets errors dropped carrier collsns
197217691 2769876 0 0 0 0
从上面的结果看RX和TX的统计,收发的网络报文还是挺多的。只要是给"127.0.0.1"这个IP发的报文都会走这个网口,很多本地的ip server或者进程间通信就会使用到。
源码
基本原理是用 register_netdev 函数注册一个struct net_device对象,在回调函数 loopback_setup里面初始化好一些操作的回调函数。其中发送的回调函数是: “.ndo_start_xmit= loopback_xmit”, 在这个函数里面会把要发送的内容发到自己的接收队列去,调用的接口是:“netif_rx”。源码及注释如下:
/*
统计信息结构体,包括收发报文个数及字节数
因为传输会发生在软中断(soft irq)中,syncp 用于多CPU核同步
*/
struct pcpu_lstats {
u64 packets;
u64 bytes;
struct u64_stats_sync syncp;
};
/*
* The higher levels take care of making this non-reentrant (it's
* called with bh's disabled).
* 这个函数是可重入的,但由调用者保证他不可重入。需要在bottom half disabled 的情况下才能调用
*/
static netdev_tx_t loopback_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct pcpu_lstats *lb_stats;
int len;
// 将一个缓冲结构体变成孤立的skb
skb_orphan(skb);
/* Before queueing this packet to netif_rx(),
* make sure dst is refcounted.
* 强制对 skb 的目标缓存进行引用计数
*/
skb_dst_force(skb);
// 获取mac数据帧的协议类型,三层(IP)根据这个字段决定使用哪个协议实例处理
skb->protocol = eth_type_trans(skb, dev);
/* it's OK to use per_cpu_ptr() because BHs are off */
// 获取统计计数(当前CPU核的),因为下半部已经关闭(soft irq内),这里调用是安全的
lb_stats = this_cpu_ptr(dev->lstats);
len = skb->len;
if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { // 放入接收协议栈队列
u64_stats_update_begin(&lb_stats->syncp); // 防止在更新的时候其它上下文会读这个变量值
lb_stats->bytes += len;
lb_stats->packets++;
u64_stats_update_end(&lb_stats->syncp);
}
return NETDEV_TX_OK;
}
// 获取统计信息
static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
u64 bytes = 0;
u64 packets = 0;
int i;
for_each_possible_cpu(i) { // 遍历所有核,把计数加起来
const struct pcpu_lstats *lb_stats;
u64 tbytes, tpackets;
unsigned int start;
lb_stats = per_cpu_ptr(dev->lstats, i);
do {
start = u64_stats_fetch_begin_irq(&lb_stats->syncp);
tbytes = lb_stats->bytes;
tpackets = lb_stats->packets;
} while (u64_stats_fetch_retry_irq(&lb_stats->syncp, start));
bytes += tbytes;
packets += tpackets;
}
stats->rx_packets = packets;
stats->tx_packets = packets;
stats->rx_bytes = bytes;
stats->tx_bytes = bytes;
return stats;
}
static u32 always_on(struct net_device *dev)
{
return 1;
}
static const struct ethtool_ops loopback_ethtool_ops = {
.get_link = always_on,
};
// 初始化统计结构体
static int loopback_dev_init(struct net_device *dev)
{
dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
if (!dev->lstats)
return -ENOMEM;
return 0;
}
// 注销设备,回收资源
static void loopback_dev_free(struct net_device *dev)
{
dev_net(dev)->loopback_dev = NULL;
free_percpu(dev->lstats);
free_netdev(dev);
}
static const struct net_device_ops loopback_ops = {
.ndo_init = loopback_dev_init,
.ndo_start_xmit= loopback_xmit,
.ndo_get_stats64 = loopback_get_stats64,
.ndo_set_mac_address = eth_mac_addr,
};
/*
* The loopback device is special. There is only one instance
* per network namespace.
*/
static void loopback_setup(struct net_device *dev)
{
dev->mtu = 64 * 1024; // 与最大数据帧有关
dev->hard_header_len = ETH_HLEN; /* 14 */
dev->min_header_len = ETH_HLEN; /* 14 */
dev->addr_len = ETH_ALEN; /* 6 */
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
dev->flags = IFF_LOOPBACK;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
netif_keep_dst(dev);
dev->hw_features = NETIF_F_GSO_SOFTWARE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
| NETIF_F_GSO_SOFTWARE
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
| NETIF_F_SCTP_CRC
| NETIF_F_HIGHDMA
| NETIF_F_LLTX
| NETIF_F_NETNS_LOCAL
| NETIF_F_VLAN_CHALLENGED
| NETIF_F_LOOPBACK;
dev->ethtool_ops = &loopback_ethtool_ops; // ethtool 是用于查询和设置网卡参数的命令
dev->header_ops = ð_header_ops;
dev->netdev_ops = &loopback_ops; // 网络设备的操作
dev->destructor = loopback_dev_free; // 注销设备,回收资源
}
/* Setup and register the loopback device. */
static __net_init int loopback_net_init(struct net *net)
{
struct net_device *dev;
int err;
err = -ENOMEM;
dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); // 申请空间,成功就回调 loopback_setup
if (!dev)
goto out;
dev_net_set(dev, net);
err = register_netdev(dev); // 注册 net_device
if (err)
goto out_free_netdev;
BUG_ON(dev->ifindex != LOOPBACK_IFINDEX);
net->loopback_dev = dev;
return 0;
out_free_netdev:
free_netdev(dev);
out:
if (net_eq(net, &init_net))
panic("loopback: Failed to register netdevice: %d\n", err);
return err;
}
/* Registered in net/core/dev.c */
struct pernet_operations __net_initdata loopback_net_ops = {
.init = loopback_net_init,
};
end