上一章主要描述了sack协商过程以及接收端接收到乱序数据包后回复sack的过程,本章主要描述发送端在收到接收端回复的sack信息后的处理流程,首先先看几个概念:
struct tcp_sock {
//已发送,但未ack的包个数(packets_out个数也包含sack的个数,只要客户端真正ack的,
//发送端可以从write queue队列移除的才会减少)
u32 packets_out; /* Packets which are "in flight" */
//重传且未确认的个数
u32 retrans_out; /* Retransmitted packets out */
//被标记丢包的个数
u32 lost_out; /* Lost packets */
//被标记sack的包个数
u32 sacked_out; /* SACK'd packets */
//表示snd_una到最新一个sack的个数,为[snd_una,highest_sack]的个数
//fackets_out = sack_out + lost_out
u32 fackets_out; /* FACK'd packets */
struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
struct tcp_sack_block recv_sack_cache[4];
//最大sack号
struct sk_buff *highest_sack; /* skb just after the highest
* skb with SACKed bit set
* (validity guaranteed only if
* sacked_out > 0)
*/
};
接收端在检测数据包乱序是否超过乱序阀值(默认为3,在proc的tcp_reordering里可配置)是会用到fack_out和sacked_out,针对这两个的含义如下图所示,fack_out表示收到最大sack到snd_una间的大小,sacked_out表示接收方sack到的包个数;
tcp_ack
tcp收到ack的入口函数,首先判断是否是慢路径,如果是,则进入tcp_sacktag_write_queue流程对skb的发送队列做sack标记;标记完成后进入tcp_clean_rtx_queue根据最新的ack号清理发送队列的skb,然后进入tcp_fastretrans_alert,修改当前的拥塞状态机状态,并判断是否有丢包情况,如果有,则进入tcp_update_scoreboard对发送队列的skb标记loss状态,等待重传。
tcp_fastretrans_alert
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
const int prior_unsacked,
bool is_dupack, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
//两种情况认定可能有丢包情况:
//1、is_dupack为true,表明此时收到的ack是重复ack(如乱序了)
//2、本次是sack,并且fack_out超过乱序阀值
//在tcp_update_scoreboard里会根据不同的类型对wirte_queue队列标记loss
//1、reno每次只能标记1个loss;
//2、fack每次标记fack_out - reording个loss;
//3、sack每次标记sack_out - reording个loss;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
tp->sacked_out = 0;
if (WARN_ON(!tp->sacked_out && tp->fackets_out))
tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
if (flag & FLAG_ECE)
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
if (tcp_check_sack_reneging(sk, flag))
return;
/* C. Check consistency of the current state. */
tcp_verify_left_out(tp);
/* D. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (icsk->icsk_ca_state == TCP_CA_Open) {
WARN_ON(tp->retrans_out != 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (icsk->icsk_ca_state) {
case TCP_CA_CWR:
/* CWR is to be held something *above* high_seq
* is ACKed for CWR bit to reach receiver. */
if (tp->snd_una != tp->high_seq) {
tcp_end_cwnd_reduction(sk);
tcp_set_ca_state(sk, TCP_CA_Open);
}
break;
case TCP_CA_Recovery:
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
if (tcp_try_undo_recovery(sk))
return;
tcp_end_cwnd_reduction(sk);
break;
}
}
/* E. Process state. */
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
if (tcp_try_undo_partial(sk, acked, prior_unsacked))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
tcp_fackets_out(tp) > tp->reordering;
}
if (tcp_try_undo_dsack(sk)) {
tcp_try_keep_open(sk);
return;
}
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack);
if (icsk->icsk_ca_state != TCP_CA_Open)
return;
/* Fall through to processing in Open state. */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
if (is_dupack)
tcp_add_reno_sack(sk);
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag, prior_unsacked);
return;
}
/* MTU probe failure: don't reduce cwnd */
if (icsk->icsk_ca_state < TCP_CA_CWR &&
icsk->icsk_mtup.probe_size &&
tp->snd_una == tp->mtu_probe.probe_seq_start) {
tcp_mtup_probe_failed(sk);
/* Restores the reduction we did in tcp_mtup_probe() */
tp->snd_cwnd++;
tcp_simple_retransmit(sk);
return;
}
/* Otherwise enter Recovery state */
tcp_enter_recovery(sk, (flag & FLAG_ECE));
fast_rexmit = 1;
}
//判断有丢包情况下,对write队列进行lost标记
if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
tcp_xmit_retransmit_queue(sk);
}
从tcp_fastretrans_alert可以看出,有两种情况会被认定为有丢包:
1)、检查到有重复ack(is_dupack为true);
2)、当sack的包个数超过乱序阀值是;
static inline int tcp_fackets_out(const struct tcp_sock *tp)
{
//如果为reno,则返回sack_out+1,因为reno只有当sack_out超过乱序阀值时才会标记重传;
//非reno模式下,当fackets_out超过乱序阀值时就会标记重传
//非reno又分为两种情况:
//1、sack模式,此模式下,会对sacked_out - 乱序阀值个skb标记重传;
//2、fack模式,此模式下,会对fackets_out - 乱序阀值个skb标记重传;
return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
}
最终标记lost的函数在tcp_mark_head_lost,这里就是根据不同的模式对write_queue队列标记lost,函数最终走到tcp_skb_mark_lost,对需要标记重传的skb设置loss标志位TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_reno(tp)) {
//reno每次只能标记1个lost
tcp_mark_head_lost(sk, 1, 1);
} else if (tcp_is_fack(tp)) {
//fack模式下,只要higest_sack操作乱序阀值reording,就会标记丢包
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
tcp_mark_head_lost(sk, lost, 0);
} else {
//sack模式下,收到的sack个数需要超过乱序阀值reording,才会开始标记丢包
int sacked_upto = tp->sacked_out - tp->reordering;
if (sacked_upto >= 0)
tcp_mark_head_lost(sk, sacked_upto, 0);
else if (fast_rexmit)
tcp_mark_head_lost(sk, 1, 1);
}
}
标记完lost后进入重传发送函数tcp_xmit_retransmit_queue,这里就会遍历write_queue队列的skb,如果skb有标记loss标记,则对skb重新发送。