Linux中TCP协议相关的代码,主要位于 net/ipv4/tcp* 目录下, 这里只针对ipv4.
具体有:
net/ipv4/af_inet.c --- 这是网络协议层的接口,由他调用下面的TCP,UDP等协议层代码。
net/ipv4/tcp.c --- TCP协议的普通代码,如初始化,close等
net/ipv4/tcp_output.c ---TCP协议输出处理,封包相关代码
net/ipv4/tcp_input.c --- TCP协议输入处理,解析包相关代码。
net/ipv4/tcp_minisocks.c --- 一些微处理,如timewait等。
net/ipv4/tcp_ipv4.c --- 和ip层交互的接口代码。
另外还有一些具体协议处理相关的代码,如tcp_timer.c, tcp_westwood.c 等
1. af_inet.c 中的inet_ioctl 处理:
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
int err = 0;
struct tcp_sock *tp = tcp_sk(sk);
switch (cmd) {
case SIOCGSTAMP:
err = sock_get_timestamp(sk, (struct timeval __user *)arg);
break;
case SIOCGSTAMPNS:
err = sock_get_timestampns(sk, (struct timespec __user *)arg);
break;
case SIOCADDRT:
case SIOCDELRT:
case SIOCRTMSG:
err = ip_rt_ioctl(cmd, (void __user *)arg);
break;
case SIOCDARP:
case SIOCGARP:
case SIOCSARP:
err = arp_ioctl(cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFBRDADDR:
case SIOCSIFBRDADDR:
case SIOCGIFNETMASK:
case SIOCSIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
err = devinet_ioctl(cmd, (void __user *)arg);
break;
case SIOCSETSEQACK:
http_seq_ack_info=(struct HTTP_SEQ_ACK_INFO *)arg;
#ifdef VEEX_DEBUG
printk("before changing.\n");
printk("snd_nxt:%08x\n",tp->snd_nxt);
printk("rcv_nxt:%08x\n",tp->rcv_nxt);
#endif
tp->snd_nxt=http_seq_ack_info->seq;
tp->rcv_nxt=http_seq_ack_info->ack;
#ifdef VEEX_DEBUG
printk("after changing.\n");
printk("snd_nxt:%08x\n",tp->snd_nxt);
printk("rcv_nxt:%08x\n",tp->rcv_nxt);
#endif
break;
default:
if (sk->sk_prot->ioctl)
err = sk->sk_prot->ioctl(sk, cmd, arg);
else
err = -ENOIOCTLCMD;
break;
}
return err;
}
应用层可以调用到这个ioctl, 而且可以自定义添加自己想要的ioctl处理,如上面的SIOCSETSEQACK 就是自己定义的一个ioctl处理。
默认情况下,这些ioctl处理都回跳到default下面,执行具体某协议相关的ioctl动作。如TCP,UDP,ICMP等。
2. tcp.c 中的tcp_close() 处理:
void tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
int data_was_unread = 0;
int state;
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
/* Special case. */
inet_csk_listen_stop(sk);
goto adjudge_to_death;
}
/* We need to flush the recv. buffs. We do this only on the
* descriptor close, not protocol-sourced closes, because the
* reader process may not have drained the data yet!
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
tcp_hdr(skb)->fin;
data_was_unread += len;
__kfree_skb(skb);
}
sk_stream_mem_reclaim(sk);
/* As outlined in RFC 2525, section 2.17, we send a RST here because
* data was lost. To witness the awful effects of the old behavior of
* always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
* GET in an FTP client, suspend the process, wait for the client to
* advertise a zero window, then kill -9 the FTP client, wheee...
* Note: timeout is always zero in such a case.
*/
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_KERNEL);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
} else if (tcp_close_state(sk)) {
/* We FIN if the application ate all the data before
* zapping the connection.
*/
/* RED-PEN. Formally speaking, we have broken TCP state
* machine. State transitions:
*
* TCP_ESTABLISHED -> TCP_FIN_WAIT1
* TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
* TCP_CLOSE_WAIT -> TCP_LAST_ACK
*
* are legal only when FIN has been sent (i.e. in window),
* rather than queued out of window. Purists blame.
*
* F.e. "RFC state" is ESTABLISHED,
* if Linux state is FIN-WAIT-1, but FIN is still not sent.
*
* The visible declinations are that sometimes
* we enter time-wait state, when it is not required really
* (harmless), do not send active resets, when they are
* required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
* they look as CLOSING or LAST_ACK for Linux)
* Probably, I missed some more holelets.
* --ANK
*/
tcp_send_fin(sk);
}
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
atomic_inc(sk->sk_prot->orphan_count);
/* It is the last release_sock in its life. It will remove backlog. */
release_sock(sk);
/* Now socket is owned by kernel and we acquire BH lock
to finish close. No need to check for user refs.
*/
local_bh_disable();
bh_lock_sock(sk);
BUG_TRAP(!sock_owned_by_user(sk));
/* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
goto out;
/* This is a (useful) BSD violating of the RFC. There is a
* problem with TCP as specified in that the other end could
* keep a socket open forever with no application left this end.
* We use a 3 minute timeout (about the same as BSD) then kill
* our end. If they send after that then tough - BUT: long enough
* that we won't make the old 4*rto = almost no time - whoops
* reset mistake.
*
* Nope, it was not mistake. It is really desired behaviour
* f.e. on http servers, when such sockets are useless, but
* consume significant resources. Let's do it with special
* linger2 option. --ANK
*/
if (sk->sk_state == TCP_FIN_WAIT2) {
struct tcp_sock *tp = tcp_sk(sk);
if (tp->linger2 < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
} else {
const int tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
inet_csk_reset_keepalive_timer(sk,
tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto out;
}
}
}
if (sk->sk_state != TCP_CLOSE) {
sk_stream_mem_reclaim(sk);
if (tcp_too_many_orphans(sk,
atomic_read(sk->sk_prot->orphan_count))) {
if (net_ratelimit())
printk(KERN_INFO "TCP: too many of orphaned "
"sockets\n");
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
}
}
if (sk->sk_state == TCP_CLOSE)
inet_csk_destroy_sock(sk);
/* Otherwise, socket is reprieved until protocol close. */
out:
bh_unlock_sock(sk);
local_bh_enable();
sock_put(sk);
}
TCP close会有服务端和客户端的4次握手,主要是会跳到tcp_send_fin(sk)中去。
3. tcp_output.c 中的 tcp_send_fin(), 这是通知另一端结束tcp信号的函数:
/* Send a fin. The caller locks the socket for us. This cannot be
* allowed to fail queueing a FIN frame under any circumstances.
*/
void tcp_send_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = tcp_write_queue_tail(sk);
int mss_now;
/* Optimization, tack on the FIN if we have a queue of
* unsent frames. But be careful about outgoing SACKS
* and IP options.
*/
mss_now = tcp_current_mss(sk, 1);
if (tcp_send_head(sk) != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
if (skb)
break;
yield();
}
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
TCP_SKB_CB(skb)->sacked = 0;
skb_shinfo(skb)->gso_segs = 1;
skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
tcp_queue_skb(sk, skb);
}
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
/* Push out any pending frames which were held back due to
* TCP_CORK or attempt at coalescing tiny packets.
* The socket must be locked by the caller.
*/
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle)
{
struct sk_buff *skb = tcp_send_head(sk);
if (skb) {
if (tcp_write_xmit(sk, cur_mss, nonagle))
tcp_check_probe_timer(sk);
}
}
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
*
* Returns 1, if no segments are in flight and we have queued segments, but
* cannot send anything now because of SWS or another problem.
*/
static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int tso_segs, sent_pkts;
int cwnd_quota;
int result;
/* If we are closed, the bytes will have to remain here.
* In time closedown will finish, we empty the write queue and all
* will be happy.
*/
if (unlikely(sk->sk_state == TCP_CLOSE))
return 0;
sent_pkts = 0;
/* Do MTU probing. */
if ((result = tcp_mtu_probe(sk)) == 0) {
return 0;
} else if (result > 0) {
sent_pkts = 1;
}
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota)
break;
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
break;
if (tso_segs == 1) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
(tcp_skb_is_last(sk, skb) ?
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
if (tcp_tso_should_defer(sk, skb))
break;
}
limit = mss_now;
if (tso_segs > 1) {
limit = tcp_window_allows(tp, skb,
mss_now, cwnd_quota);
if (skb->len < limit) {
unsigned int trim = skb->len % mss_now;
if (trim)
limit = skb->len - trim;
}
}
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now)))
break;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
break;
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
update_send_head(sk, skb);
tcp_minshall_update(tp, mss_now, skb);
sent_pkts++;
}
if (likely(sent_pkts)) {
tcp_cwnd_validate(sk);
return 0;
}
return !tp->packets_out && tcp_send_head(sk);
}
最终会通过tcp_transmit_skb()函数来发送数据包:
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
* All SKB's seen here are completely headerless. It is our
* job to build the TCP header, and pass the packet down to
* IP so it can do the same plus pass the packet off to the
* device.
*
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
int tcp_header_size;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
#endif
struct tcphdr *th;
int sysctl_flags;
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
/* If congestion control is doing timestamping, we must
* take such a timestamp before we potentially clone/copy.
*/
if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
__net_timestamp(skb);
if (likely(clone_it)) {
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
}
inet = inet_sk(sk);
tp = tcp_sk(sk);
tcb = TCP_SKB_CB(skb);
tcp_header_size = tp->tcp_header_len;
#define SYSCTL_FLAG_TSTAMPS 0x1
#define SYSCTL_FLAG_WSCALE 0x2
#define SYSCTL_FLAG_SACK 0x4
sysctl_flags = 0;
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
if (sysctl_tcp_timestamps) {
tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
}
if (sysctl_tcp_window_scaling) {
tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
sysctl_flags |= SYSCTL_FLAG_WSCALE;
}
if (sysctl_tcp_sack) {
sysctl_flags |= SYSCTL_FLAG_SACK;
if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
}
} else if (unlikely(tp->rx_opt.eff_sacks)) {
/* A SACK is 2 pad bytes, a 2 byte header, plus
* 2 32-bit sequence numbers for each SACK block.
*/
tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
(tp->rx_opt.eff_sacks *
TCPOLEN_SACK_PERBLOCK));
}
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
#ifdef CONFIG_TCP_MD5SIG
/*
* Are we doing MD5 on this segment? If so - make
* room for it.
*/
md5 = tp->af_specific->md5_lookup(sk, sk);
if (md5)
tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
#endif
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
skb_set_owner_w(skb, sk);
/* Build TCP header and checksum it. */
th = tcp_hdr(skb);
th->source = inet->sport;
th->dest = inet->dport;
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->flags);
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
th->window = htons(min(tp->rcv_wnd, 65535U));
} else {
th->window = htons(tcp_select_window(sk));
}
th->check = 0;
th->urg_ptr = 0;
if (unlikely(tp->urg_mode &&
between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
th->urg_ptr = htons(tp->snd_up-tcb->seq);
th->urg = 1;
}
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
tcp_syn_build_options((__be32 *)(th + 1),
tcp_advertise_mss(sk),
(sysctl_flags & SYSCTL_FLAG_TSTAMPS),
(sysctl_flags & SYSCTL_FLAG_SACK),
(sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rx_opt.rcv_wscale,
tcb->when,
tp->rx_opt.ts_recent,
#ifdef CONFIG_TCP_MD5SIG
md5 ? &md5_hash_location :
#endif
NULL);
} else {
tcp_build_and_update_options((__be32 *)(th + 1),
tp, tcb->when,
#ifdef CONFIG_TCP_MD5SIG
md5 ? &md5_hash_location :
#endif
NULL);
TCP_ECN_send(sk, skb, tcp_header_size);
}
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
tp->af_specific->calc_md5_hash(md5_hash_location,
md5,
sk, NULL, NULL,
tcp_hdr(skb),
sk->sk_protocol,
skb->len);
}
#endif
icsk->icsk_af_ops->send_check(sk, skb->len, skb);
if (likely(tcb->flags & TCPCB_FLAG_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
if (skb->len != tcp_header_size)
tcp_event_data_sent(tp, skb, sk);
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_INC_STATS(TCP_MIB_OUTSEGS);
err = icsk->icsk_af_ops->queue_xmit(skb, 0);
if (likely(err <= 0))
return err;
tcp_enter_cwr(sk, 1);
return net_xmit_eval(err);
#undef SYSCTL_FLAG_TSTAMPS
#undef SYSCTL_FLAG_WSCALE
#undef SYSCTL_FLAG_SACK
}
4. tcp_ipv4.c 中的 输入处理,解析包等工作:
int tcp_v4_rcv(struct sk_buff *skb)
{
const struct iphdr *iph;
struct tcphdr *th;
struct sock *sk;
int ret;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
/* Count it even if it's bad */
TCP_INC_STATS_BH(TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
goto bad_packet;
if (!pskb_may_pull(skb, th->doff * 4))
goto discard_it;
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
goto bad_packet;
th = tcp_hdr(skb);
iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
nf_reset(skb);
if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
#ifdef CONFIG_NET_DMA
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
if (tp->ucopy.dma_chan)
ret = tcp_v4_do_rcv(sk, skb);
else
#endif
{
if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
}
} else
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
sock_put(sk);
return ret;
no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(NULL, skb);
}
discard_it:
/* Discard frame. */
kfree_skb(skb);
return 0;
discard_and_relse:
sock_put(sk);
goto discard_it;
do_time_wait:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
}
/* Fall through to ACK */
}
case TCP_TW_ACK:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
goto no_tcp_socket;
case TCP_TW_SUCCESS:;
}
goto discard_it;
}
一般会跳到:tcp_v4_do_rcv()
/* The socket must have it's spinlock held when we get
* here.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
* This is because we cannot sleep with the original spinlock
* held.
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;
#ifdef CONFIG_TCP_MD5SIG
/*
* We really want to reject the packet as early as possible
* if:
* o We're expecting an MD5'd packet and this is no MD5 tcp option
* o There is an MD5 option and we're not expecting one
*/
if (tcp_v4_inbound_md5_hash(sk, skb))
goto discard;
#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
TCP_CHECK_TIMER(sk);
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
TCP_CHECK_TIMER(sk);
return 0;
}
if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v4_hnd_req(sk, skb);
if (!nsk)
goto discard;
if (nsk != sk) {
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
}
return 0;
}
}
TCP_CHECK_TIMER(sk);
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
TCP_CHECK_TIMER(sk);
return 0;
reset:
tcp_v4_send_reset(rsk, skb);
discard:
kfree_skb(skb);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
* but you have been warned.
*/
return 0;
csum_err:
TCP_INC_STATS_BH(TCP_MIB_INERRS);
goto discard;
}
5. 然后再跳到 tcp_input.c 中的 tcp_rcv_state_process(),进行相应处理。