MyTCP<二>

上一节学习了socket(int.,int,int)系统调用如何传入内核并将创建工作交由内核inet_create()最终进行处理。

进一步阅读inet_create(),发现,这个函数完成的事情很多,类似于面向对象中类的构造函数一样,该函数的作用不仅仅向内核申请创建了socket,sock等结构体对象,更是完成

了相当多重要的初始化的工作。本文,我们将以TCP协议为例,分析最基本的数据收发过程,从应用层数据发送开始直至应用层数据的接收。当然,其中最为重要的就是TCP传输

层与上下层之间的接口。

首先,给出socket 和sock的数据结构。

struct socket {
socket_state state;


kmemcheck_bitfield_begin(type);
short type;
kmemcheck_bitfield_end(type);


unsigned long flags;
/*
* Please keep fasync_list & wait fields in the same cache line
*/
struct fasync_struct*fasync_list;
wait_queue_head_twait;


struct file *file;
struct sock *sk;
const struct proto_ops*ops;
};

struct sock {
/*
* Now struct inet_timewait_sock also uses sock_common, so please just
* don't add nothing before this first member (__sk_common) --acme
*/
struct sock_common__sk_common;
#define sk_node __sk_common.skc_node
#define sk_nulls_node __sk_common.skc_nulls_node
#define sk_refcnt __sk_common.skc_refcnt
#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping


#define sk_copy_start __sk_common.skc_hash
#define sk_hash __sk_common.skc_hash
#define sk_family __sk_common.skc_family
#define sk_state __sk_common.skc_state
#define sk_reuse __sk_common.skc_reuse
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
#define sk_net __sk_common.skc_net
kmemcheck_bitfield_begin(flags);
unsigned int sk_shutdown  : 2,
sk_no_check  : 2,
sk_userlocks : 4,
sk_protocol  : 8,
sk_type      : 16;
kmemcheck_bitfield_end(flags);
int sk_rcvbuf;
socket_lock_t sk_lock;
/*
* The backlog queue is special, it is always used with
* the per-socket spinlock held and requires low latency
* access. Therefore we special case it's implementation.
*/
struct {
struct sk_buff *head;
struct sk_buff *tail;
int len;
} sk_backlog;
wait_queue_head_t*sk_sleep;
struct dst_entry*sk_dst_cache;
#ifdef CONFIG_XFRM
struct xfrm_policy*sk_policy[2];
#endif
rwlock_t sk_dst_lock;
atomic_t sk_rmem_alloc;
atomic_t sk_wmem_alloc;
atomic_t sk_omem_alloc;
int sk_sndbuf;
struct sk_buff_headsk_receive_queue;
struct sk_buff_headsk_write_queue;
#ifdef CONFIG_NET_DMA
struct sk_buff_headsk_async_wait_queue;
#endif
int sk_wmem_queued;
int sk_forward_alloc;
gfp_t sk_allocation;
int sk_route_caps;
int sk_gso_type;
unsigned int sk_gso_max_size;
int sk_rcvlowat;
unsigned long sk_flags;
unsigned long        sk_lingertime;
struct sk_buff_headsk_error_queue;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
int sk_err,
sk_err_soft;
atomic_t sk_drops;
unsigned shortsk_ack_backlog;
unsigned shortsk_max_ack_backlog;
__u32 sk_priority;
struct ucred sk_peercred;
long sk_rcvtimeo;
long sk_sndtimeo;
struct sk_filter      *sk_filter;
void *sk_protinfo;
struct timer_listsk_timer;
ktime_t sk_stamp;
struct socket *sk_socket;
void *sk_user_data;
struct page *sk_sndmsg_page;
struct sk_buff*sk_send_head;
__u32 sk_sndmsg_off;
int sk_write_pending;
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
__u32 sk_mark;
/* XXX 4 bytes hole on 64 bit */
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk, int bytes);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
  int (*sk_backlog_rcv)(struct sock *sk,
 struct sk_buff *skb);  
void                    (*sk_destruct)(struct sock *sk);
};


通过观察发现,socket 数据结构中含有sock结构体的指针,以及struct proto 的指针;

sock数据结构中含有socket结构体的指针,以及struct proto_ops的指针,由此可知,对于某一个套接字而言,sock,struct proto,socket,struct proto_ops四者之间都可以相互进行访问。对于TCP协议而言,依据其所属的协议族,struct proto和struct proto_ops对象都是唯一的,分别对应于tcp_prot和inet_stream_ops。二者的定义如下:

struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure= tcp_enter_memory_pressure,
.sockets_allocated= &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated= &tcp_memory_allocated,
.memory_pressure= &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt= compat_tcp_setsockopt,
.compat_getsockopt= compat_tcp_getsockopt,
#endif
};

const struct proto_ops inet_stream_ops = {
.family   = PF_INET,
.owner   = THIS_MODULE,
.release   = inet_release,
.bind   = inet_bind,
.connect   = inet_stream_connect,
.socketpair   = sock_no_socketpair,
.accept   = inet_accept,
.getname   = inet_getname,
.poll   = tcp_poll,
.ioctl   = inet_ioctl,
.listen   = inet_listen,
.shutdown   = inet_shutdown,
.setsockopt   = sock_common_setsockopt,
.getsockopt   = sock_common_getsockopt,
.sendmsg   = tcp_sendmsg,
.recvmsg   = sock_common_recvmsg,
.mmap   = sock_no_mmap,
.sendpage   = tcp_sendpage,
.splice_read   = tcp_splice_read,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};

在内核中,tcp协议属于L4层,其协议代号为IPPROTO_TCP,为了使得内核能够将对TCP相关的处理操作与IPPROTO_TCP协议代号关联起来,因此,内核定义了数据结构struct inet_protosw,该结构体定义如下:

/* This is used to register socket interfaces for IP protocols.  */
struct inet_protosw {
struct list_head list;


        /* These two fields form the lookup key.  */
unsigned shorttype;   /* This is the 2nd argument to socket(2). */
unsigned shortprotocol; /* This is the L4 protocol number.  */


struct proto *prot;
const struct proto_ops *ops;
  
char             no_check;   /* checksum on rcv/xmit/none? */
unsigned char flags;      /* See INET_PROTOSW_* below.  */
};

在全局数组中inetsw_array进行定义,如下所示:

static struct inet_protosw inetsw_array[] =
{
{
.type =       SOCK_STREAM,
.protocol =   IPPROTO_TCP,
.prot =       &tcp_prot,
.ops =        &inet_stream_ops,
.no_check =   0,
.flags =      INET_PROTOSW_PERMANENT |
     INET_PROTOSW_ICSK,
},
{
.type =       SOCK_DGRAM,
.protocol =   IPPROTO_UDP,
.prot =       &udp_prot,
.ops =        &inet_dgram_ops,
.no_check =   UDP_CSUM_DEFAULT,
.flags =      INET_PROTOSW_PERMANENT,
       },
       {
      .type =       SOCK_RAW,
      .protocol =   IPPROTO_IP,/* wild card */
      .prot =       &raw_prot,
      .ops =        &inet_sockraw_ops,
      .no_check =   UDP_CSUM_DEFAULT,
      .flags =      INET_PROTOSW_REUSE,
       }
};


那么当TCP想向IP层递交报文时,它是如何找到与IP层的接口函数呢?与IP层的接口函数名字叫ip_queue_xmit(),该结构体在ipv4_specific中进行定义,如下:

const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit   = ip_queue_xmit,
.send_check   = tcp_v4_send_check,
.rebuild_header  = inet_sk_rebuild_header,
.conn_request   = tcp_v4_conn_request,
.syn_recv_sock  = tcp_v4_syn_recv_sock,
.remember_stamp  = tcp_v4_remember_stamp,
.net_header_len  = sizeof(struct iphdr),
.setsockopt   = ip_setsockopt,
.getsockopt   = ip_getsockopt,
.addr2sockaddr  = inet_csk_addr2sockaddr,
.sockaddr_len   = sizeof(struct sockaddr_in),
.bind_conflict  = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
};

而该对象的指定在函数static int tcp_v4_init_sock(struct sock *sk)中,

static int tcp_v4_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);


skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);


icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;


/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them.  -DaveM
*/
tp->snd_cwnd = 2;


/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;


tp->reordering = sysctl_tcp_reordering;
icsk->icsk_ca_ops = &tcp_init_congestion_ops;


sk->sk_state = TCP_CLOSE;


sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);


icsk->icsk_af_ops = &ipv4_specific;
icsk->icsk_sync_mss = tcp_sync_mss;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv4_specific;
#endif


/* TCP Cookie Transactions */
if (sysctl_tcp_cookie_size > 0) {
/* Default, cookies without s_data_payload. */
tp->cookie_values =
kzalloc(sizeof(*tp->cookie_values),
sk->sk_allocation);
if (tp->cookie_values != NULL)
kref_init(&tp->cookie_values->kref);
}
/* Presumed zeroed, in order of appearance:
* cookie_in_always, cookie_out_never,
* s_data_constant, s_data_in, s_data_out
*/
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];


local_bh_disable();
percpu_counter_inc(&tcp_sockets_allocated);
local_bh_enable();


return 0;
}

那么最终static int tcp_v4_init_sock(struct sock *sk)是如何被初始化呢?经过寻找,我们在tcp_prot中发现了该函数。

struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure= tcp_enter_memory_pressure,
.sockets_allocated= &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated= &tcp_memory_allocated,
.memory_pressure= &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt= compat_tcp_setsockopt,
.compat_getsockopt= compat_tcp_getsockopt,
#en

至此,数据的发送过程已经打通,接下来我们分析如何打通数据的接收过程。





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值