tcp/ip 协议栈实现1-概述

源码基于linux 3.0.35, imx6,refer and tks
linux 内核源代码剖析-tcpip实现,深入浅出linux tcpip,tcpip详解1

1.协议栈架构,一图概括



2.协议栈初始化流程
core_initcall(sock_init)-----------net/socket.c---------socket文件系统的初始化
subsys_initcall(proto_init)---net/core/sock.c---传输层TCP的初始化

subsys_initcall(net_dev_init)---net/core/dev.c---设备无关接口的初始化

fs_initcall(inet_init)----------net/ipv4/af_inet.c---协议无关接口的初始化(部分协议栈的初始化),上图的INET
module_init(dm9000_init)-----------drivers/net/dm9000.c---driver
上面的这5个层次的初始化,哪个先执行?看3

3.内核怎么初始化initcalls段的?
init/main.c
start_kernel -> rest_init -> kernel_init -> do_basic_setup -> do_initcalls
此函数会依次调用下面各个段的函数
include/asm-generic/vmlinux.lds.h
#define INITCALLS							\
	*(.initcallearly.init)						\
	VMLINUX_SYMBOL(__early_initcall_end) = .;			\
  	*(.initcall0.init)						\
  	*(.initcall0s.init)						\
  	*(.initcall1.init)						\
  	*(.initcall1s.init)						\
  	*(.initcall2.init)						\
  	*(.initcall2s.init)						\
  	*(.initcall3.init)						\
  	*(.initcall3s.init)						\
  	*(.initcall4.init)						\
  	*(.initcall4s.init)						\
  	*(.initcall5.init)						\
  	*(.initcall5s.init)						\
	*(.initcallrootfs.init)						\
  	*(.initcall6.init)						\
  	*(.initcall6s.init)						\
  	*(.initcall7.init)						\
  	*(.initcall7s.init)
include/linux/init.h
#define pure_initcall(fn)		__define_initcall("0",fn,0)

#define core_initcall(fn)		__define_initcall("1",fn,1)
#define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
#define postcore_initcall(fn)		__define_initcall("2",fn,2)
#define postcore_initcall_sync(fn)	__define_initcall("2s",fn,2s)
#define arch_initcall(fn)		__define_initcall("3",fn,3)
#define arch_initcall_sync(fn)		__define_initcall("3s",fn,3s)
#define subsys_initcall(fn)		__define_initcall("4",fn,4)
#define subsys_initcall_sync(fn)	__define_initcall("4s",fn,4s)
#define fs_initcall(fn)			__define_initcall("5",fn,5)
#define fs_initcall_sync(fn)		__define_initcall("5s",fn,5s)
#define rootfs_initcall(fn)		__define_initcall("rootfs",fn,rootfs)
#define device_initcall(fn)		__define_initcall("6",fn,6)
#define device_initcall_sync(fn)	__define_initcall("6s",fn,6s)
#define late_initcall(fn)		__define_initcall("7",fn,7)
#define late_initcall_sync(fn)		__define_initcall("7s",fn,7s)
#define __define_initcall(level,fn,id) \
	static initcall_t __initcall_##fn##id __used \
	__attribute__((__section__(".initcall" level ".init"))) = fn

所以,经 core_initcall修饰的sock_init最先执行


4.协议栈send,recv流程likely走一遍

TCP消息下行(应用-->驱动):
1 sendto   应用层
2 sys_sendto net/socket.c 系统调用接口(socket文件系统)
3 sock_sendmsg net/socket.c 
4 __sock_sendmsg_nosec net/socket.c 
5 sock->ops->sendmsg net/socket.c 
6 inet_sendmsg (inet_stream_ops) net/ipv4/af_inet.c 协议无关接口
7 sk->sk_prot->sendmsg net/ipv4/af_inet.c 
8 tcp_sendmsg (tcp_prot) net/ipv4/tcp.c TCP层
9 tcp_push net/ipv4/tcp.c 
10 __tcp_push_pending_frames net/ipv4/tcp_out.c 
11 tcp_write_xmit net/ipv4/tcp_out.c 
12 tcp_transmit_skb net/ipv4/tcp_out.c 
13 icsk->icsk_af_ops->queue_xmit net/ipv4/tcp_out.c 
14 ip_queue_xmit (ipv4_specific) net/ipv4/ip_output.c IP层
15 ip_local_out net/ipv4/ip_output.c 
16 dst_output include/net/dst.h 
17 skb_dst(skb)->output include/net/dst.h 
18 ip_output net/ipv4/ip_output.c 
19 ip_finish_output net/ipv4/ip_output.c 
20 ip_finish_output2 net/ipv4/ip_output.c 
21 neigh->output net/ipv4/ip_output.c 
22 dev_queue_xmit net/core/dev.c 设备无关接口
23 dev_hard_start_xmit net/core/dev.c 
24 ops->ndo_start_xmit net/core/dev.c 
25 dm9000_start_xmit drivers/net/dm9000.c 链路层,driver

TCP消息上行(应用<--驱动):
28 recvfrom   应用层
27 sys_recvmsg net/socket.c 系统调用接口(socket文件系统)
26 sock_recvmsg net/socket.c  
25 __sock_recvmsg_nosec net/socket.c 
24 sock->ops->recvmsg net/socket.c  
23 inet_recvmsg (inet_stream_ops) net/ipv4/af_inet.c 协议无关接口
22 sk->sk_prot->recvmsg net/ipv4/af_inet.c 
21 tcp_recvmsg (tcp_prot) net/ipv4/tcp.c TCP层
20 skb_queue_walk net/ipv4/tcp.c 
19 __skb_queue_tail(加入队列) net/ipv4/tcp_input.c 
18 tcp_rcv_established net/ipv4/tcp_input.c 
17 tcp_v4_do_rcv net/ipv4/tcp_ipv4.c 
16 tcp_v4_rcv net/ipv4/tcp_ipv4.c 
15 ipprot->handler net/ipv4/ip_input.c IP层
14 ip_local_deliver_finish net/ipv4/ip_input.c 
13 ip_local_deliver net/ipv4/ip_input.c 
12 skb_dst(skb)->input include/net/dst.h 
11 dst_input include/net/dst.h 
10 ip_rcv_finish net/ipv4/ip_input.c 
9 ip_rcv net/ipv4/ip_input.c 
8 deliver_skb net/core/dev.c 驱动无关接口
7 __netif_receive_skb net/core/dev.c 
6 process_backlog net/core/dev.c 
5 net_rx_action(软中断) net/core/dev.c 
4 enqueue_to_backlog net/core/dev.c 
3 netif_rx (Non NAPI) net/core/dev.c 
2 dm9000_rx drivers/net/dm9000.c driver
1 dm9000_interrupt drivers/net/dm9000.c 


5.协议栈socket系统调用走一遍
应用层执行int fd = socket(AF_INET, SOCK_STREAM, 0)时,内核做了什么?
先看原型
int socket(int domain, int type, int protocol);
domain 目前可以取值AF_UNIX, AF_LOCAL, AF_INET, AF_INET6, AF_IPX, AF_NETLINK, AF_BLUETOOTH, AF_CAN 等
type 目前可以取值SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET, SOCK_RAW, SOCK_PACKET 等
protocol:
tcp/ip协议栈没有实现,取值0 ;
bluez协议栈有实现之,可取值 BTPROTO_L2CAP, BTPROTO_HCI, BTPROTO_SCO, BTPROTO_RFCOMM, BTPROTO_BNEP, BTPROTO_CMTP, BTPROTO_HIDP, BTPROTO_AVDTP等

内核流程:
1.sys_socket(net/socket.c)
2.sock_createnet/socket.c
3.__sock_createnet/socket.c

此函数中根据domain = AF_INET,通过函数pf = rcu_dereference(net_families[family]);
找到结构体inet_family_ops  net/ipv4/af_inet.c)
static const struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
4.pf->create(net, sock, protocol, kern);net/socket.c
5.即调用inet_family_ops 的inet_create net/ipv4/af_inet.c)
6.创建socket sock
此函数中根据type = SOCK_STREAM,从静态数组struct  inet_protosw  inetsw_arraynet/ipv4/af_inet.c)中找到对应SOCK_STREAM的结构体,
将结构体的
struct proto_ops ops和struct proto prot赋给sock,最终会形成
sock->ops = inet_stream_opsinet_stream_ops位于net/ipv4/af_inet.c)
sock->sk->sk_prot = tcp_prot (tcp_prot位于net/ipv4/tcp_ipv4.c)

可见,重要的几个结构体是上面红色字体标记的:
struct
net_proto_family
struct
proto_ops
struct
proto
struct  
inet_protosw主要是为了方便绑定proto_ops 和 proto 


6.协议无关接口的作用是:

不管是什么协议,应用层都可以通过send,recv系统调用操作数据。比如
net/ipv4/af_inet.c来处理tcpip协议的协议无关
net/bluetooth/af_bluetooth.c来处理bluez协议的协议无关

而实现协议无关,只需要定义一个socket调用接口,如下
1.实现一个结构体struct net_proto_family,标记上是family 协议比如PF_INET,PF_BLUETOOTH 并注册到内核上。在应用层执行socket系统调用的时候会制定哪种family ,然后内核会根据family 来找具体的那个net_proto_family,然后调用这个结构体的create
2.实现struct proto_ops的各个函数指针,在应用层执行系统调用send,recv时,会调用proto_ops结构体的sendmsg和recvmsg


7.tcp中几个重要的结构体实例
//net/ipv4/af_inet.c

static struct inet_protosw inetsw_array[] =
{
	{
		.type =       SOCK_STREAM,
		.protocol =   IPPROTO_TCP,
		.prot =       &tcp_prot,
		.ops =        &inet_stream_ops,
		.no_check =   0,
		.flags =      INET_PROTOSW_PERMANENT |
			      INET_PROTOSW_ICSK,
	},

	{
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_UDP,
		.prot =       &udp_prot,
		.ops =        &inet_dgram_ops,
		.no_check =   UDP_CSUM_DEFAULT,
		.flags =      INET_PROTOSW_PERMANENT,
       },

       {
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_ICMP,
		.prot =       &ping_prot,
		.ops =        &inet_dgram_ops,
		.no_check =   UDP_CSUM_DEFAULT,
		.flags =      INET_PROTOSW_REUSE,
       },

       {
	       .type =       SOCK_RAW,
	       .protocol =   IPPROTO_IP,	/* wild card */
	       .prot =       &raw_prot,
	       .ops =        &inet_sockraw_ops,
	       .no_check =   UDP_CSUM_DEFAULT,
	       .flags =      INET_PROTOSW_REUSE,
       }
};
//net/ipv4/af_inet.c

const struct proto_ops inet_stream_ops = {
	.family		   = PF_INET,
	.owner		   = THIS_MODULE,
	.release	   = inet_release,
	.bind		   = inet_bind,
	.connect	   = inet_stream_connect,
	.socketpair	   = sock_no_socketpair,
	.accept		   = inet_accept,
	.getname	   = inet_getname,
	.poll		   = tcp_poll,
	.ioctl		   = inet_ioctl,
	.listen		   = inet_listen,
	.shutdown	   = inet_shutdown,
	.setsockopt	   = sock_common_setsockopt,
	.getsockopt	   = sock_common_getsockopt,
	.sendmsg	   = inet_sendmsg,
	.recvmsg	   = inet_recvmsg,
	.mmap		   = sock_no_mmap,
	.sendpage	   = inet_sendpage,
	.splice_read	   = tcp_splice_read,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_sock_common_setsockopt,
	.compat_getsockopt = compat_sock_common_getsockopt,
	.compat_ioctl	   = inet_compat_ioctl,
#endif
};
//net/ipv4/af_inet.c

static const struct net_protocol tcp_protocol = {
	.handler =	tcp_v4_rcv,
	.err_handler =	tcp_v4_err,
	.gso_send_check = tcp_v4_gso_send_check,
	.gso_segment =	tcp_tso_segment,
	.gro_receive =	tcp4_gro_receive,
	.gro_complete =	tcp4_gro_complete,
	.no_policy =	1,
	.netns_ok =	1,
};
//net/ipv4/tcp_ipv4.c
struct proto tcp_prot = {
	.name			= "TCP",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
	.connect		= tcp_v4_connect,
	.disconnect		= tcp_disconnect,
	.accept			= inet_csk_accept,
	.ioctl			= tcp_ioctl,
	.init			= tcp_v4_init_sock,
	.destroy		= tcp_v4_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
	.recvmsg		= tcp_recvmsg,
	.sendmsg		= tcp_sendmsg,
	.sendpage		= tcp_sendpage,
	.backlog_rcv		= tcp_v4_do_rcv,
	.hash			= inet_hash,
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
	.enter_memory_pressure	= tcp_enter_memory_pressure,
	.sockets_allocated	= &tcp_sockets_allocated,
	.orphan_count		= &tcp_orphan_count,
	.memory_allocated	= &tcp_memory_allocated,
	.memory_pressure	= &tcp_memory_pressure,
	.sysctl_mem		= sysctl_tcp_mem,
	.sysctl_wmem		= sysctl_tcp_wmem,
	.sysctl_rmem		= sysctl_tcp_rmem,
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp_sock),
	.slab_flags		= SLAB_DESTROY_BY_RCU,
	.twsk_prot		= &tcp_timewait_sock_ops,
	.rsk_prot		= &tcp_request_sock_ops,
	.h.hashinfo		= &tcp_hashinfo,
	.no_autobind		= true,
#ifdef CONFIG_COMPAT
	.compat_setsockopt	= compat_tcp_setsockopt,
	.compat_getsockopt	= compat_tcp_getsockopt,
#endif
};


linux 用socket结构体描述套接字接口,关键成员是sk(sk->sk_prot,此ops指向传输层)ops(此ops代表用户接口层)
struct socket {
 socket_state  state;

 kmemcheck_bitfield_begin(type);
 short   type;
 kmemcheck_bitfield_end(type);

 unsigned long  flags;

 struct socket_wq __rcu *wq;

 struct file  *file;
 struct sock  *sk;
 const struct proto_ops *ops;
};



  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值