协议处理函数
每种协议都会有一个初始化函数,如果协议被静态编译到内核中初始化函数在引导期间执行,如果被编译成模块,就在模块加载时执行。
设备驱动接收到一个报文后,将其保存在sk_buff结构内,然后对protocol字段初始化。
skb->protocol = eth_type_trans(skb, nic->netdev);
protocol字段被前面章节出现过的__netif_receive_skb函数使用,找到合适的L3处理函数并调用deliver_skb函数将skb传递到上层进行处理。
一个报文可以被传递给多个处理函数。比如当报文嗅探程序运行时。
协议处理函数组织
ptype_base是一个由16个元素组成的hash表,使用哪个protocol字段做为hash表的key查找注册的报文处理函数。
ptype_all时一个链表,ETH_P_ALL协议被组织到这个链表中。
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
协议处理函数注册
内核调用dev_add_pack注册一个协议。这个函数的唯一参数是struct packet_type。
struct packet_type结构是ptype_baseHash表和ptype_all链表的元素。
packet_type结构的dev时网卡设备指针,意思是这个协议为该设备开启。置位NULL时,就是为所有设备开启。比如tcpdump -i eth0 命令通过PF_PACKET套接字建立一个packet_type,并将dev指针初始化为eth0相关联的net_device实例。
func是上层协议的处理函数指针。
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
struct net_device *dev; /* NULL is wildcarded here */
int (*func) (struct sk_buff *,
struct net_device *,
struct packet_type *,
struct net_device *);
void (*list_func) (struct list_head *,
struct packet_type *,
struct net_device *);
bool (*id_match)(struct packet_type *ptype,
struct sock *sk);
void *af_packet_priv;
struct list_head list;
};
当有多个packet_type实例关联到相同的协议,则匹配type的输入报文会为这些packet_type调用func函数,转交所有这些协议处理函数实例。
前面提到协议注册通过dev_add_pack函数,这个函数就是将pacekt_type放到ptype_base hash表或者ptype_all链表上。
/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
*
* BEWARE!!! Protocol handlers, mangling input packets,
* MUST BE last in hash buckets and checking protocol handlers
* MUST start from promiscuous ptype_all chain in net_bh.
* It is true now, do not change it.
* Explanation follows: if protocol handler, mangling packet, will
* be the first on list, it is not able to sense, that packet
* is cloned and should be copied-on-write, so that it will
* change it and subsequent readers will get broken packet.
* --ANK (980803)
*/
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
return pt->dev ? &pt->dev->ptype_all : &ptype_all;
else
return pt->dev ? &pt->dev->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
void dev_add_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt);
spin_lock(&ptype_lock);
list_add_rcu(&pt->list, head);
spin_unlock(&ptype_lock);
}
IPV4协议注册
IPV4协议在inet_init函数注册协议处理函数。
static struct packet_type ip_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
.list_func = ip_list_rcv,
};
dev_add_pack(&ip_packet_type);
设置报文类型
驱动程序使用eth_type_trans函数设置报文类型,这个函数主要完成两个功能:
- 根据报文目的MAC地址设置sk_buff的pkt_type字段。
- 取出L3层协议字段。
#define PACKET_HOST 0 /* To us */
#define PACKET_BROADCAST 1 /* To all */
#define PACKET_MULTICAST 2 /* To group */
#define PACKET_OTHERHOST 3 /* To someone else */
/**
* eth_type_trans - determine the packet's protocol ID.
* @skb: received socket data
* @dev: receiving network device
*
* The rule here is that we
* assume 802.3 if the type field is short enough to be a length.
* This is normal practice and works for any 'now in use' protocol.
*/
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
unsigned short _service_access_point;
const unsigned short *sap;
const struct ethhdr *eth;
skb->dev = dev;
skb_reset_mac_header(skb);
eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_MULTICAST;
}
else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
dev->dev_addr)))
skb->pkt_type = PACKET_OTHERHOST;
/*
* Some variants of DSA tagging don't have an ethertype field
* at all, so we check here whether one of those tagging
* variants has been configured on the receiving interface,
* and if so, set skb->protocol without looking at the packet.
*/
if (unlikely(netdev_uses_dsa(dev)))
return htons(ETH_P_XDSA);
if (likely(eth_proto_is_802_3(eth->h_proto)))
return eth->h_proto;
/*
* This is a magic hack to spot IPX packets. Older Novell breaks
* the protocol design and runs IPX over 802.3 without an 802.2 LLC
* layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
* won't work for fault tolerant netware but does for the rest.
*/
sap = skb_header_pointer(skb, 0, sizeof(*sap), &_service_access_point);
if (sap && *sap == 0xFFFF)
return htons(ETH_P_802_3);
/*
* Real 802.2 LLC
*/
return htons(ETH_P_802_2);
}