TCP在发送数据的时候会携带ACK标记,但这里我们要研究的是TCP在收到数据时发送不带数据的ACK报文的情况,这时ACK的发送是通过tcp_send_ack函数完成的:
3027 void tcp_send_ack(struct sock *sk)
3028 {
3029 struct sk_buff *buff;
3030
3031 /* If we have been reset, we may not send again. */
3032 if (sk->sk_state == TCP_CLOSE)
3033 return;
3034
3035 /* We are not putting this on the write queue, so
3036 * tcp_transmit_skb() will set the ownership to this
3037 * sock.
3038 */
3039 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); //申请一个只能容纳TCP首部的skb
3040 if (buff == NULL) { //申请失败
3041 inet_csk_schedule_ack(sk);
3042 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; //记录用来计算延时确认的估值
3043 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3044 TCP_DELACK_MAX, TCP_RTO_MAX); //使用延迟ACK定时器发送ACK
3045 return;
3046 }
3047
3048 /* Reserve space for headers and prepare control bits. */
3049 skb_reserve(buff, MAX_TCP_HEADER);
3050 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); //设置ACK标记
3051
3052 /* Send it off, this clears delayed acks for us. */
3053 TCP_SKB_CB(buff)->when = tcp_time_stamp; //记录发送时间,用于计算RTT
3054 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); //发送无数据的ACK
3055 }
tcp_transmit_skb函数会将ACK标记写如TCP报头,并设置ack_seq:
828 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
829 gfp_t gfp_mask)
830 {
831 const struct inet_connection_sock *icsk = inet_csk(sk);
...
899 th->ack_seq = htonl(tp->rcv_nxt);
900 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
901 tcb->tcp_flags);
...
899:tp->rcv_nxt是“下次希望接收的数据的序列号”,tp->rcv_nxt - 1则是“已经接收到的数据的序列号”。
发送ACK的时机有:
(1)进程将TCP收到的数据读取(通过tcp_recvmsg、tcp_splice、DMA等等)完毕,会调用tcp_cleanup_rbuf函数发送ACK来通过数据发送端更新窗口:
1323 void tcp_cleanup_rbuf(struct sock *sk, int copied)
1324 {
1325 struct tcp_sock *tp = tcp_sk(sk);
1326 bool time_to_ack = false;
1327
1328 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1329
1330 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1331 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
1332 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1333
1334 if (inet_csk_ack_scheduled(sk)) { //需要发送ACK
1335 const struct inet_connection_sock *icsk = inet_csk(sk);
1336 /* Delayed ACKs frequently hit locked sockets during bulk
1337 * receive. */
1338 if (icsk->icsk_ack.blocked || //延迟ACK定时器超时时进程锁定了socket,导致ACK无法发送
1339 /* Once-per-two-segments ACK was not sent by tcp_input.c */
1340 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || //接收了多于2个数据段但还未发送ACK
1341 /*
1342 * If this read emptied read buffer, we send ACK, if
1343 * connection is not bidirectional, user drained
1344 * receive buffer and there was a small segment
1345 * in queue.
1346 */
1347 (copied > 0 && //进程至少copy了1字节数据
1348 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1349 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1350 !icsk->icsk_ack.pingpong)) && //非socket是交互模式(交互模式允许延迟发送ACK)
1351 !atomic_read(&sk->sk_rmem_alloc))) //接收缓存为空
1352 time_to_ack = true;
1353 }
1354
1355 /* We send an ACK if we can now advertise a non-zero window
1356 * which has been raised "significantly".
1357 *
1358 * Even if window raised up to infinity, do not send window open ACK
1359 * in states, where we will not receive more. It is useless.
1360 */
1361 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1362 __u32 rcv_window_now = tcp_receive_window(tp); //得到当前接收窗口的值,即对端计算的发送窗口
1363
1364 /* Optimize, __tcp_select_window() is not cheap. */
1365 if (2*rcv_window_now <= tp->window_clamp) { //当前接收窗口小于最大接收窗口的一半
1366 __u32 new_window = __tcp_select_window(sk); //根据当前缓存情况得出真正的通告窗口
1367
1368 /* Send ACK now, if this read freed lots of space
1369 * in our buffer. Certainly, new_window is new window.
1370 * We can advertise it now, if it is not less than current one.
1371 * "Lots" means "at least twice" here.
1372 */
1373 if (new_window && new_window >= 2 * rcv_window_now) //真正的通告窗口是现在对端计算的发送窗口的2倍以上
1374 time_to_ack = true;
1375 }
1376 }
1377 if (time_to_ack) //决定发送ACK
1378 tcp_send_ack(sk);
1379 }
1348-1349:看一下icsk->icsk_ack.pend