我们简单了解下L4层协议和Raw IP是如何与IP层进行交互的。
L4层协议
L4层协议可以通过静态编译和模块配置两种方式加入内核。
比较重要的协议如TCP、UDP、ICMP通常是静态编译至内核。
一些不常用的或者比较特殊的协议,则是通过内核配置加入内核。如IGMP,SCTP,IPIP等等。
L4层协议的注册
L4层协议有net_protocol结构定义:
/* This is used to register protocols. */
struct net_protocol {
int (*handler)(struct sk_buff *skb); //由协议注册的,用于处理入口封包的函数
//由ICMP协议处理函数所用的函数,当收到ICMP UNREACHABLE 时,通知L4层
void (*err_handler)(struct sk_buff *skb, u32 info);
int (*gso_send_check)(struct sk_buff *skb);
struct sk_buff *(*gso_segment)(struct sk_buff *skb,
int features);
struct sk_buff **(*gro_receive)(struct sk_buff **head,
struct sk_buff *skb);
int (*gro_complete)(struct sk_buff *skb);
unsigned int no_policy:1, //使协议免于IPsec检查
netns_ok:1;
};
/*
* Add a protocol handler to the hash tables
*/
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int hash, ret;
hash = protocol & (MAX_INET_PROTOS - 1);
spin_lock_bh(&inet_proto_lock);
if (inet_protos[hash]) {
ret = -1;
} else {
inet_protos[hash] = prot;
ret = 0;
}
spin_unlock_bh(&inet_proto_lock);
return ret;
}
/*
* Remove a protocol from the hash tables.
*/
int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int hash, ret;
hash = protocol & (MAX_INET_PROTOS - 1);
spin_lock_bh(&inet_proto_lock);
if (inet_protos[hash] == prot) {
inet_protos[hash] = NULL;
ret = 0;
} else {
ret = -1;
}
spin_unlock_bh(&inet_proto_lock);
synchronize_net();
return ret;
}
EXPORT_SYMBOL(inet_add_protocol);
EXPORT_SYMBOL(inet_del_protocol);
我们看下具体的例子:
下面是TCP、UDP、ICMP协议结构体的初始化,这要先完成:
static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.gso_send_check = tcp_v4_gso_send_check,
.gso_segment = tcp_tso_segment,
.gro_receive = tcp4_gro_receive,
.gro_complete = tcp4_gro_complete,
.no_policy = 1,
.netns_ok = 1,
};
static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.gso_send_check = udp4_ufo_send_check,
.gso_segment = udp4_ufo_fragment,
.no_policy = 1,
.netns_ok = 1,
};
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.no_policy = 1,
.netns_ok = 1,
};
协议结构体初始化结束后,在inet_init中,在把各个协议加入内核。
static int __init inet_init(void)
{
struct sk_buff *dummy_skb;
struct inet_protosw *q;
struct list_head *r;
int rc = -EINVAL;
BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
rc = proto_register(&tcp_prot, 1);
if (rc)
goto out;
rc = proto_register(&udp_prot, 1);
if (rc)
goto out_unregister_tcp_proto;
rc = proto_register(&raw_prot, 1);
if (rc)
goto out_unregister_udp_proto;
/*
* Tell SOCKET that we are alive...
*/
(void)sock_register(&inet_family_ops);
#ifdef CONFIG_SYSCTL
ip_static_sysctl_init();
#endif
/*
* Add all the base protocols.
*/
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
#ifdef CONFIG_IP_MULTICAST
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
#endif
/* Register the socket-side information for inet_create. */
for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
INIT_LIST_HEAD(r);
for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
inet_register_protosw(q);
/*
* Set the ARP module up
*/
arp_init();
/*
* Set the IP module up
*/
ip_init();
tcp_v4_init();
/* Setup TCP slab cache for open requests. */
tcp_init();
/* Setup UDP memory threshold */
udp_init();
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
/*
* Set the ICMP layer up
*/
if (icmp_init() < 0)
panic("Failed to create the ICMP control socket.\n");
/*
* Initialise the multicast router
*/
#if defined(CONFIG_IP_MROUTE)
if (ip_mr_init())
printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
#endif
/*
* Initialise per-cpu ipv4 mibs
*/
if (init_ipv4_mibs())
printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
ipv4_proc_init();
ipfrag_init();
dev_add_pack(&ip_packet_type);
rc = 0;
out:
return rc;
out_unregister_udp_proto:
proto_unregister(&udp_prot);
out_unregister_tcp_proto:
proto_unregister(&tcp_prot);
goto out;
}
从代码中可以看出,TCP、UDP、ICMP等是直接静态编译至内核的。而IGMP只有内核配置了组播才会以模块的方式加入内核。
L3到L4的封包传递:ip_local_deliver_finish
Raw套接字和Raw IP
我们要知道,并不是所有的L4层处理,都是在内核实现的。应用程序可以通过Raw 套接字和Raw IP跳过L4层协议,直接与IP层进行交互。