linux内核协议栈阅读,linux内核tcp协议栈走读记录(一)

先看[1]中的两段代码:

ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, 0, &sock);

ret = sock->ops->connect(sock, (struct sockaddr *)&s_addr, sizeof(s_addr), 0);

sock是怎么创建的?sock->ops中有多少操作?

int __init inet_init(void)

{

(void)sock_register(&inet_family_ops);

}

//af_inet.c

static const struct net_proto_family inet_family_ops = {

.family = PF_INET,

.create = inet_create,

/* .owner = THIS_MODULE,*/

};

int sock_create_kern(int family, int type, int protocol, struct socket **res)

{

return __sock_create(&init_net, family, type, protocol, res, 1);

}

int __sock_create(struct net *net, int family, int type, int protocol,

struct socket **res, int kern)

{

err = pf->create(net, sock, protocol, kern);

}

//这里的pf->create指向inet_create函数

接下来就需要分析inet_create(af_inet.c)的处理流程。

static int inet_create(struct net *net, struct socket *sock, int protocol,

int kern)

{

sock->ops = answer->ops;

answer_prot = answer->prot;

sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);

sock_init_data(sock, sk);

}

struct sock *sk_alloc(struct net *net, int family, gfp_t priority,

struct proto *prot)

{

sk->sk_prot = sk->sk_prot_creator = prot;

}

protocol就是在应用层调用sock函数传输的参数,对应三种数据包的处理方式,主要SOCK_STREAM(TCP)、SOCK_DGRAM(UDP)、SOCK_RAW(原始socket)。代码中的answer指针,就是根据protocol选择了inetsw_array中的某一个。sock->ops根据上面的代码,可知:sock->ops = answer->ops。

static struct inet_protosw inetsw_array[] =

{

{

.type = SOCK_STREAM,

.protocol = IPPROTO_TCP,

.prot = &tcp_prot,

.ops = &inet_stream_ops,

.no_check = 0,

.flags = INET_PROTOSW_PERMANENT |

INET_PROTOSW_ICSK,

},

{

.type = SOCK_DGRAM,

.protocol = IPPROTO_UDP,

.prot = &udp_prot,

.ops = &inet_dgram_ops,

.no_check = UDP_CSUM_DEFAULT,

.flags = INET_PROTOSW_PERMANENT,

},

{

.type = SOCK_DGRAM,

.protocol = IPPROTO_ICMP,

.prot = &ping_prot,

.ops = &inet_dgram_ops,

.no_check = UDP_CSUM_DEFAULT,

.flags = INET_PROTOSW_REUSE,

},

{

.type = SOCK_RAW,

.protocol = IPPROTO_IP, /* wild card */

.prot = &raw_prot,

.ops = &inet_sockraw_ops,

.no_check = UDP_CSUM_DEFAULT,

.flags = INET_PROTOSW_REUSE,

}

};

以tcp为例,这里的ops指针中内容是:

const struct proto_ops inet_stream_ops = {

.family = PF_INET,

/*.owner = THIS_MODULE,*/

.release = inet_release,

.bind = inet_bind,

.connect = inet_stream_connect,

.socketpair = sock_no_socketpair,

.accept = inet_accept,

.getname = inet_getname,

// .poll = tcp_poll,

.ioctl = inet_ioctl,

.listen = inet_listen,

.shutdown = inet_shutdown,

.setsockopt = sock_common_setsockopt,

.getsockopt = sock_common_getsockopt,

.sendmsg = inet_sendmsg,

.recvmsg = inet_recvmsg,

// .mmap = sock_no_mmap,

.sendpage = inet_sendpage,

// .splice_read = tcp_splice_read,

#ifdef CONFIG_COMPAT

.compat_setsockopt = compat_sock_common_setsockopt,

.compat_getsockopt = compat_sock_common_getsockopt,

.compat_ioctl = inet_compat_ioctl,

#endif

};

回到文首提到的sock->ops->connect操作,就是执行了inet_stream_connect。在上面提到sk_alloc函数, sk->sk_prot指向的就是answer->prot。

int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,

int addr_len, int flags)

{

err = __inet_stream_connect(sock, uaddr, addr_len, flags);

}

int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,

int addr_len, int flags)

{

err = sk->sk_prot->connect(sk, uaddr, addr_len);

}

如果是tcp协议,answer->prot就是指向了tcp_prot。结构体tcp_prot(tcp_ipv4.c)中的内容:

struct proto tcp_prot = {

.name = "TCP",

//.owner = THIS_MODULE,

.close = tcp_close,

.connect = tcp_v4_connect,

.disconnect = tcp_disconnect,

.accept = inet_csk_accept,

.ioctl = tcp_ioctl,

.init = tcp_v4_init_sock,

.destroy = tcp_v4_destroy_sock,

.shutdown = tcp_shutdown,

.setsockopt = tcp_setsockopt,

.getsockopt = tcp_getsockopt,

.recvmsg = tcp_recvmsg,

.sendmsg = tcp_sendmsg,

.sendpage = tcp_sendpage,

.backlog_rcv = tcp_v4_do_rcv,

.release_cb = tcp_release_cb,

.mtu_reduced = tcp_v4_mtu_reduced,

.hash = inet_hash,

.unhash = inet_unhash,

.get_port = inet_csk_get_port,

.enter_memory_pressure = tcp_enter_memory_pressure,

.stream_memory_free = tcp_stream_memory_free,

.sockets_allocated = &tcp_sockets_allocated,

.orphan_count = &tcp_orphan_count,

.memory_allocated = &tcp_memory_allocated,

.memory_pressure = &tcp_memory_pressure,

.sysctl_mem = sysctl_tcp_mem,

.sysctl_wmem = sysctl_tcp_wmem,

.sysctl_rmem = sysctl_tcp_rmem,

.max_header = MAX_TCP_HEADER,

.obj_size = sizeof(struct tcp_sock),

// .slab_flags = SLAB_DESTROY_BY_RCU,

.twsk_prot = &tcp_timewait_sock_ops,

.rsk_prot = &tcp_request_sock_ops,

.h.hashinfo = &tcp_hashinfo,

.no_autobind = true,

#ifdef CONFIG_COMPAT

.compat_setsockopt = compat_tcp_setsockopt,

.compat_getsockopt = compat_tcp_getsockopt,

#endif

#ifdef CONFIG_MEMCG_KMEM

.init_cgroup = tcp_init_cgroup,

.destroy_cgroup = tcp_destroy_cgroup,

.proto_cgroup = tcp_proto_cgroup,

#endif

};

sk->sk_prot->connect实际执行的就是tcp_v4_connect函数。

博客[5]会继续分析tcp_v4_connect之后的操作,比如源端口的分配。端口的分配与函数inet_hash_connect相关,基于博客[6]中描述的原则。

如果用户已经绑定了端口,就使用绑定的端口。

如果用户没有绑定端口,则让系统自动选取,策略如下:

获取端口的取值区间,以及区间内端口的个数。

根据初始偏移量,从端口区间内的某个端口开始,遍历整个区间。

2.1 如果端口是保留的,直接跳过。

2.2 如果端口已经被使用了。

2.2.1 不允许复用已经被bind()的端口。

2.2.2 检查端口是否能被重用,可以的话就重用此端口。

2.3 如果端口没有被使用过,就选择此端口。

int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)

{

rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,

RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,

IPPROTO_TCP,

orig_sport, orig_dport, sk);

err = inet_hash_connect(&tcp_death_row, sk);

}

int inet_hash_connect(struct inet_timewait_death_row *death_row,

struct sock *sk)

{

return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),

__inet_check_established, __inet_hash_nolisten);

}

int __inet_hash_connect(struct inet_timewait_death_row *death_row,

struct sock *sk, u32 port_offset,

int (*check_established)(struct inet_timewait_death_row *,

struct sock *, __u16, struct inet_timewait_sock **),

int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))

{

if (!snum) {

//端口分配

}

inet_bind_hash(sk, tb, port);

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值