Raw Socket 创建过程分析
1、用户态接口
socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))
系统调用依次:
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
return retval;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
2、sock_create分析
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
int family, int type, int protocol分别为PF_PACKET, SOCK_RAW, ETH_P_ALL
current->nsproxy->net_ns该参数为当前进程命名空间的network结构体。
int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern)
{
struct socket *sock;
const struct net_proto_family *pf;
err = security_socket_create(family, type, protocol, kern);
sock = sock_alloc();
sock->type = type;
pf = rcu_dereference(net_families[family]);
err = pf->create(net, sock, protocol, kern);
err = security_socket_post_create(sock, family, type, protocol, kern);
*res = sock;
}
主要关注 下面这个调用:
pf = rcu_dereference(net_families[family]);
在协议层初始化时会注册ops 到net_families这个结构体数组中,例如:
packet初始化:
const struct net_proto_family packet_family_ops = {
.family = PF_PACKET,
.create = packet_create,
.owner = THIS_MODULE,
};
int __init packet_init(void)
{
rc = sock_register(&packet_family_ops);
}
inet初始化:
static const struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
int __init inet_init(void)
{
(void)sock_register(&inet_family_ops);
}
因此首先根据传入的family参数查找net_families里对应的操作集。
对于RAW Socket来说,传入的是PF_PACKET,因此pf = packet_family_ops。
3、packet_create分析
调用如下:
int packet_create(struct net *net, struct socket *sock, int protocol, int kern)
{
struct sock *sk;
struct packet_sock *po;
sock->state = SS_UNCONNECTED;
sock->ops = &packet_ops;
sock_init_data(sock, sk);
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->xmit = dev_queue_xmit;
po->prot_hook.func = packet_rcv;
//将sk挂到af_packet_priv上
po->prot_hook.af_packet_priv = sk;
if (proto) {
po->prot_hook.type = proto;
__register_prot_hook(sk);
}
}
1、设置socket的操作集ops
sock->ops = &packet_ops;
2、sock_init_data设置了sk的一些属性:
将socket挂在sk上,设置当数据可读时的操作函数sock_def_readable.
sk->sk_socket = sock;
sk->sk_data_ready = sock_def_readable;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; //极大值
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
3、po指向sock,设置po的发送和接收方法。
po->xmit = dev_queue_xmit;
po->prot_hook.func = packet_rcv;
4、将po的prot_hook(packet_type类型)挂到ptype_all链表上。
dev_add_pack(&po->prot_hook);
struct list_head *head = ptype_head(pt);
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
return pt->dev ? &pt->dev->ptype_all : &ptype_all;
else
return pt->dev ? &pt->dev->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
可见,ETH_P_ALL类型的packet_type会挂到ptype_all,其他类型挂到ptype_base。
在链路层接收数据包时,会用到。
ptype_all是一个双向链表,ptype_base是双向链表数组,也是哈希表,如下:
RAW Socket创建过程的结构体关系如下: