服务器收到客户端的syn包,处理接收的数据包入口函数为tcp_rcv,下面是该函数中处理连接请求的代码。具体处理函数是tcp_conn_request,该函数处理了syn包并发送了ack包。进入syn_rcv状态。完成第一第二次完成。
// 是监听socket则可能是一个syn包
if(sk->state==TCP_LISTEN)
{ // 不存在收到ack包的可能,发送重置包
if(th->ack) /* These use the socket TOS.. might want to be the received TOS */
tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);
/*
* We don't care for RST, and non SYN are absorbed (old segments)
* Broadcast/multicast SYN isn't allowed. Note - bug if you change the
* netmask on a running connection it can go broadcast. Even Sun's have
* this problem so I'm ignoring it
*/
// 不存在这种可能的各种情况,直接丢包
if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
{
kfree_skb(skb, FREE_READ);
release_sock(sk);
return 0;
}
/*
* Guess we need to make a new socket up
*/
// 是个syn包,建立连接
tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
/*
* Now we have several options: In theory there is nothing else
* in the frame. KA9Q has an option to send data with the syn,
* BSD accepts data with the syn up to the [to be] advertised window
* and Solaris 2.1 gives you a protocol error. For now we just ignore
* it, that fits the spec precisely and avoids incompatibilities. It
* would be nice in future to drop through and process the data.
*/
release_sock(sk);
return 0;
}
// 收到一个syn包时的处理
static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
unsigned long daddr, unsigned long saddr,
struct options *opt, struct device *dev, unsigned long seq)
{
struct sk_buff *buff;
struct tcphdr *t1;
unsigned char *ptr;
struct sock *newsk;
struct tcphdr *th;
struct device *ndev=NULL;
int tmp;
struct rtable *rt;
th = skb->h.th;
// data_ready是唤醒阻塞在accept函数的进程,而这次还没建立起连接,执行回调没有意义
/* If the socket is dead, don't accept the connection. */
if (!sk->dead)
{
sk->data_ready(sk,0);
}
else
{ // 该socket已经处于释放状态,发送reset包
if(sk->debug)
printk("Reset on %p: Connect on dead socket.\n",sk);
tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
tcp_statistics.TcpAttemptFails++;
kfree_skb(skb, FREE_READ);
return;
}
/*
* Make sure we can accept more. This will prevent a
* flurry of syns from eating up all our memory.
*/
// 如果当前的半连接+已连接队列大小大于等于最大值则丢包
if (sk->ack_backlog >= sk->max_ack_backlog)
{
tcp_statistics.TcpAttemptFails++;
kfree_skb(skb, FREE_READ);
return;
}
/*
* We need to build a new sock struct.
* It is sort of bad to have a socket without an inode attached
* to it, but the wake_up's will just wake up the listening socket,
* and if the listening socket is destroyed before this is taken
* off of the queue, this will take care of it.
*/
// 分配一个新的sock结构用于连接连接
newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
if (newsk == NULL)
{
/* just ignore the syn. It will get retransmitted. */
tcp_statistics.TcpAttemptFails++;
kfree_skb(skb, FREE_READ);
return;
}
// 从listen套接字复制内容,再覆盖某些字段
memcpy(newsk, sk, sizeof(*newsk));
skb_queue_head_init(&newsk->write_queue);
skb_queue_head_init(&newsk->receive_queue);
newsk->send_head = NULL;
newsk->send_tail = NULL;
skb_queue_head_init(&newsk->back_log);
newsk->rtt = 0; /*TCP_CONNECT_TIME<<3*/
newsk->rto = TCP_TIMEOUT_INIT;
newsk->mdev = 0;
newsk->max_window = 0;
newsk->cong_window = 1;
newsk->cong_count = 0;
newsk->ssthresh = 0;
newsk->backoff = 0;
newsk->blog = 0;
newsk->intr = 0;
newsk->proc = 0;
newsk->done = 0;
newsk->partial = NULL;
newsk->pair = NULL;
newsk->wmem_alloc = 0;
newsk->rmem_alloc = 0;
newsk->localroute = sk->localroute;
newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
newsk->err = 0;
newsk->shutdown = 0;
newsk->ack_backlog = 0;
// 期待收到的对端下一个字节的序列号
newsk->acked_seq = skb->h.th->seq+1;
// 进程可以读但是还没有读取的字节序列号
newsk->copied_seq = skb->h.th->seq+1;
// 当收到对端fin包的时候,回复的ack包中的序列号
newsk->fin_seq = skb->h.th->seq;
// 进入syn_recv状态
newsk->state = TCP_SYN_RECV;
newsk->timeout = 0;
newsk->ip_xmit_timeout = 0;
// 下一个发送的字节的序列号
newsk->write_seq = seq;
// 可发送的字节序列号最大值
newsk->window_seq = newsk->write_seq;
// 序列号小于rcv_ack_seq的数据包都已经收到
newsk->rcv_ack_seq = newsk->write_seq;
newsk->urg_data = 0;
newsk->retransmits = 0;
// 关闭套接字的时候不需要等待一段时间才能关闭
newsk->linger=0;
newsk->destroy = 0;
init_timer(&newsk->timer);
newsk->timer.data = (unsigned long)newsk;
newsk->timer.function = &net_timer;
init_timer(&newsk->retransmit_timer);
newsk->retransmit_timer.data = (unsigned long)newsk;
newsk->retransmit_timer.function=&retransmit_timer;
// 记录ip
newsk->dummy_th.source = skb->h.th->dest;
newsk->dummy_th.dest = skb->h.th->source;
/*
* Swap these two, they are from our point of view.
*/
newsk->daddr = saddr;
newsk->saddr = daddr;
// 放到tcp的socket哈希队列
put_sock(newsk->num,newsk);
newsk->dummy_th.res1 = 0;
newsk->dummy_th.doff = 6;
newsk->dummy_th.fin = 0;
newsk->dummy_th.syn = 0;
newsk->dummy_th.rst = 0;
newsk->dummy_th.psh = 0;
newsk->dummy_th.ack = 0;
newsk->dummy_th.urg = 0;
newsk->dummy_th.res2 = 0;
newsk->acked_seq = skb->h.th->seq + 1;
newsk->copied_seq = skb->h.th->seq + 1;
newsk->socket = NULL;
/*
* Grab the ttl and tos values and use them
*/
newsk->ip_ttl=sk->ip_ttl;
newsk->ip_tos=skb->ip_hdr->tos;
/*
* Use 512 or whatever user asked for
*/
/*
* Note use of sk->user_mss, since user has no direct access to newsk
*/
rt=ip_rt_route(saddr, NULL,NULL);
if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
newsk->window_clamp = rt->rt_window;
else
newsk->window_clamp = 0;
if (sk->user_mss)
newsk->mtu = sk->user_mss;
else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
newsk->mtu = rt->rt_mss - HEADER_SIZE;
else
{
#ifdef CONFIG_INET_SNARL /* Sub Nets Are Local */
if ((saddr ^ daddr) & default_mask(saddr))
#else
if ((saddr ^ daddr) & dev->pa_mask)
#endif
newsk->mtu = 576 - HEADER_SIZE;
else
newsk->mtu = MAX_WINDOW;
}
/*
* But not bigger than device MTU
*/
newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
/*
* This will min with what arrived in the packet
*/
// 解析tcp选项
tcp_options(newsk,skb->h.th);
// 分配一个skb
buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
if (buff == NULL)
{
sk->err = ENOMEM;
newsk->dead = 1;
newsk->state = TCP_CLOSE;
/* And this will destroy it */
release_sock(newsk);
kfree_skb(skb, FREE_READ);
tcp_statistics.TcpAttemptFails++;
return;
}
// skb和sock关联,4个字节是用于tcp mss选项,告诉对端自己的mss
buff->len = sizeof(struct tcphdr)+4;
buff->sk = newsk;
buff->localroute = newsk->localroute;
t1 =(struct tcphdr *) buff->data;
/*
* Put in the IP header and routing stuff.
*/
// 构造ip和mac头
tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
/*
* Something went wrong.
*/
if (tmp < 0)
{
sk->err = tmp;
buff->free = 1;
kfree_skb(buff,FREE_WRITE);
newsk->dead = 1;
newsk->state = TCP_CLOSE;
release_sock(newsk);
skb->sk = sk;
kfree_skb(skb, FREE_READ);
tcp_statistics.TcpAttemptFails++;
return;
}
buff->len += tmp;
// tcp头
t1 =(struct tcphdr *)((char *)t1 +tmp);
memcpy(t1, skb->h.th, sizeof(*t1));
buff->h.seq = newsk->write_seq;
/*
* Swap the send and the receive.
*/
t1->dest = skb->h.th->source;
t1->source = newsk->dummy_th.source;
t1->seq = ntohl(newsk->write_seq++);
// 是个ack包,即第二次握手
t1->ack = 1;
newsk->window = tcp_select_window(newsk);
newsk->sent_seq = newsk->write_seq;
t1->window = ntohs(newsk->window);
t1->res1 = 0;
t1->res2 = 0;
t1->rst = 0;
t1->urg = 0;
t1->psh = 0;
t1->syn = 1;
t1->ack_seq = ntohl(skb->h.th->seq+1);
t1->doff = sizeof(*t1)/4+1;
ptr =(unsigned char *)(t1+1);
ptr[0] = 2;
ptr[1] = 4;
ptr[2] = ((newsk->mtu) >> 8) & 0xff;
ptr[3] =(newsk->mtu) & 0xff;
tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
// 发送ack,即第二次握手
newsk->prot->queue_xmit(newsk, ndev, buff, 0);
reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
// skb关联的socket为newsk,accept的时候摘取skb时即拿到该socket返回给应用层
skb->sk = newsk;
/*
* Charge the sock_buff to newsk.
*/
// 把skb中数据的大小算在newsk中
sk->rmem_alloc -= skb->mem_len;
newsk->rmem_alloc += skb->mem_len;
//
skb_queue_tail(&sk->receive_queue,skb);
// 连接队列节点个数加1
sk->ack_backlog++;
release_sock(newsk);
tcp_statistics.TcpOutSegs++;
}
服务器收到客户端第三次握手的ack,即完成连接的建立,处理代码流程是tcp_rcv->tcp_ack,具体代码如下。
if(sk->state==TCP_SYN_RECV)
{
tcp_set_state(sk, TCP_ESTABLISHED);
tcp_options(sk,th);
sk->dummy_th.dest=th->source;
sk->copied_seq = sk->acked_seq;
// 唤醒阻塞在等待连接建立的进程
if(!sk->dead)
sk->state_change(sk);
if(sk->max_window==0)
{
sk->max_window=32; /* Sanity check */
sk->mss=min(sk->max_window,sk->mtu);
}
}