TCP连接保活检测Keepalive

TCP连接的keepalive定时器用于定期检测连接是否正常。

Keepalive初始化

默认情况下Keepalive每两个小时触发一次,触发之后,最多发送9次探测报文,每个报文的间隔为75秒钟。即在9次探测都没有接收到对端的回复之后,认为连接已经断开。

#define TCP_KEEPALIVE_TIME  (120*60*HZ) /* two hours */
#define TCP_KEEPALIVE_PROBES    9       /* Max of 9 keepalive probes    */
#define TCP_KEEPALIVE_INTVL (75*HZ)

static int __net_init tcp_sk_init(struct net *net)
{
    net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
    net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
    net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;

在函数inet_csk_init_xmit_timers中初始化keepalive定时器。

void tcp_init_xmit_timers(struct sock *sk)
{
    inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
                  &tcp_keepalive_timer);

keepalive设置

可通过PROC文件调整Keepalive的相关参数,此调整是全局性质的。

static struct ctl_table ipv4_net_table[] = {
    {
        .procname   = "tcp_keepalive_time",
        .data       = &init_net.ipv4.sysctl_tcp_keepalive_time,
        .maxlen     = sizeof(int),
    },
    {
        .procname   = "tcp_keepalive_probes",
        .data       = &init_net.ipv4.sysctl_tcp_keepalive_probes,
        .maxlen     = sizeof(int),
    },
    {
        .procname   = "tcp_keepalive_intvl",
        .data       = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
        .maxlen     = sizeof(int),
    },

默认值如下:

$ cat /proc/sys/net/ipv4/tcp_keepalive_time
7200
$ cat /proc/sys/net/ipv4/tcp_keepalive_probes 
9
$ cat /proc/sys/net/ipv4/tcp_keepalive_intvl 
75

也可以通过套接口选项调整Keepalive参数,仅对操作的套接口生效。注意对于TCP_KEEPIDLE选项,如果套接口开启了SOCK_KEEPOPEN标志,内核将根据keepalive的剩余时长重新设置超时时间。

static int do_tcp_setsockopt(struct sock *sk, int level,
        int optname, char __user *optval, unsigned int optlen)
{

    switch (optname) {
    case TCP_KEEPIDLE:
        if (val < 1 || val > MAX_TCP_KEEPIDLE)
            err = -EINVAL;
        else {
            tp->keepalive_time = val * HZ;
            if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
                u32 elapsed = keepalive_time_elapsed(tp);
                if (tp->keepalive_time > elapsed)
                    elapsed = tp->keepalive_time - elapsed;
                else
                    elapsed = 0;
                inet_csk_reset_keepalive_timer(sk, elapsed);
            }
        }
        break;
    case TCP_KEEPINTVL:
        if (val < 1 || val > MAX_TCP_KEEPINTVL)
            err = -EINVAL;
        else
            tp->keepalive_intvl = val * HZ;
        break;
    case TCP_KEEPCNT:
        if (val < 1 || val > MAX_TCP_KEEPCNT)
            err = -EINVAL;
        else
            tp->keepalive_probes = val;
        break;

对于keepalive的三个参数,内核做了如下的限制。keepalive_time时长不能超过9个小时;keepalive_probes次数不能超过127次;最后,keepalive_intvl的时长也不能超过9小时。

#define MAX_TCP_KEEPIDLE    32767
#define MAX_TCP_KEEPINTVL   32767
#define MAX_TCP_KEEPCNT     127

另外,最重要的套接口层的keepalive开关SO_KEEPALIVE在如下函数sock_setsockopt中设置,具体实现由注册的tcp_set_keepalive函数完成。

int sock_setsockopt(struct socket *sock, int level, int optname,
            char __user *optval, unsigned int optlen)
{
    switch (optname) { 
    case SO_KEEPALIVE:
        if (sk->sk_prot->keepalive)
            sk->sk_prot->keepalive(sk, valbool);
        sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
        break;
		
struct proto tcp_prot = {
    .name           = "TCP",
    .keepalive      = tcp_set_keepalive,
}

如果val为真,即使能keepalive功能,并且套接口之前未开启过,启动keepalive定时器。否则,如果val为零,关闭keepalive定时器。

void tcp_set_keepalive(struct sock *sk, int val)
{
    if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
        return;

    if (val && !sock_flag(sk, SOCK_KEEPOPEN))
        inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
    else if (!val)
        inet_csk_delete_keepalive_timer(sk);
}

如下,如果监听套接口开启了keepalive功能,其子接口也将开启keepalive。

struct sock *tcp_create_openreq_child(const struct sock *sk,
                      struct request_sock *req, struct sk_buff *skb)
{
    if (sock_flag(newsk, SOCK_KEEPOPEN))
        inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp));

Keepalive定时器超时

如果此时套接口被用户调用占用,将keepalive定时器延迟50毫秒。如果套接口处于监听状态不作处理。

static void tcp_keepalive_timer (struct timer_list *t)
{
    struct sock *sk = from_timer(sk, t, sk_timer);
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);

    /* Only process if socket is not in use. */
    bh_lock_sock(sk);
    if (sock_owned_by_user(sk)) {
        /* Try again later. */
        inet_csk_reset_keepalive_timer (sk, HZ/20);
        goto out;
    }
    if (sk->sk_state == TCP_LISTEN) {
        pr_err("Hmm... keepalive on a LISTEN ???\n");
        goto out;
    }

如果套接口没有启用Keepalive功能,或者套接口设置有CLOSE或者SYN_SENT标志,不进行处理。如果网络中存在发出,还没有确认的报文(packets_out),或者套接口发送队列不为空,不需要进行keepalive处理,直接启动下一个Keepalive周期。

    if (!sock_flag(sk, SOCK_KEEPOPEN) ||
        ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
        goto out;

    elapsed = keepalive_time_when(tp);

    /* It is alive without keepalive 8) */
    if (tp->packets_out || !tcp_write_queue_empty(sk))
        goto resched;

如果keepalive定时器运行时间超过设定的超时时间,分成两种情况处理。第一,使能了UTO时,如果keepalive运行时长超出UTO时长,并且本地已经发送过探测报文,还是没有收到响应,则判定此连接已经出错。或者第二,UTO没有使能的情况下,如果keepalive的probe发送次数已经超过设定值(默认9次),发送TCP复位报文,终止连接。

如果以上两种情况都不成立,由函数tcp_write_wakeup发送探测报文,并且增加probe计数,将下一次超时设定为间隔值(默认为75秒)。如果由于本地拥塞导致probe报文未能发送,将下一次超时设置为500毫秒之后(TCP_RESOURCE_PROBE_INTERVAL)。

    elapsed = keepalive_time_elapsed(tp);
    
    if (elapsed >= keepalive_time_when(tp)) {
        /* If the TCP_USER_TIMEOUT option is enabled, use that
         * to determine when to timeout instead.
         */
        if ((icsk->icsk_user_timeout != 0 &&
            elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
            icsk->icsk_probes_out > 0) ||

            (icsk->icsk_user_timeout == 0 &&
            icsk->icsk_probes_out >= keepalive_probes(tp))) {
            tcp_send_active_reset(sk, GFP_ATOMIC);
            tcp_write_err(sk);
            goto out;
        }
        if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
            icsk->icsk_probes_out++;
            elapsed = keepalive_intvl_when(tp);
        } else {
            /* If keepalive was lost due to local congestion, try harder.
             */ 
            elapsed = TCP_RESOURCE_PROBE_INTERVAL;
        }

最后,如果keepalive定时器运行时长未超过设置时长(如probe报文间隔定时到期),重新设置定时器时长。

    } else {
        /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
        elapsed = keepalive_time_when(tp) - elapsed;
    }
    
    sk_mem_reclaim(sk);
        
resched:
    inet_csk_reset_keepalive_timer (sk, elapsed);
    goto out;

探测报文

如下函数tcp_write_wakeup,由上节的介绍可知,在发送keepalive探测报文时,套接口的发送队列是空的,所以内核使用tcp_xmit_probe_skb函数发送探测报文,如果当前SND.UNA未确认报文包含紧急指针SND.UP,第二个参数设置为1。

注意TCP的窗口探测机制与keepalive共同使用此函数,这里去掉了窗口探测相关代码。

int tcp_write_wakeup(struct sock *sk, int mib)
{
    struct tcp_sock *tp = tcp_sk(sk);

    if (sk->sk_state == TCP_CLOSE) return -1;

    skb = tcp_send_head(sk);
    if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
        ...
    } else {
        if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
            tcp_xmit_probe_skb(sk, 1, mib);
        return tcp_xmit_probe_skb(sk, 0, mib);

如下发送函数tcp_xmit_probe_skb,发送ACK报文,如果紧急指针urgent为真,ACK报文序号为SND.UNA,否则,如果urgent不为真,ACK报文序号为SND.UNA减去1(ACK报文不占用新序号)。

static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
{
    struct tcp_sock *tp = tcp_sk(sk);

    /* We don't queue it, tcp_transmit_skb() sets ownership. */
    skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
    if (!skb) return -1;

    /* Reserve space for headers and set control bits. */
    skb_reserve(skb, MAX_TCP_HEADER);

    /* Use a previous sequence.  This should cause the other
     * end to send an ack.  Don't queue or clone SKB, just send it.
     */
    tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
    NET_INC_STATS(sock_net(sk), mib);
    return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);

keepalive与窗口探测

内核使用icsk_probes_out保存keepalive的探测计数,并且,在接收到对端ACK报文之后,清空此计数。窗口探测同样使用icsk_probes_out变量做计数,但是由于在套接口发送队列有报文时,才进行零窗口探测,而相反,只有在发送队列为空时,才会进行keepalive处理,所以两者互不影响。

static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
    /* We passed data and got it acked, remove any soft error
     * log. Something worked...
     */
    sk->sk_err_soft = 0;
    icsk->icsk_probes_out = 0;
    tp->rcv_tstamp = tcp_jiffies32;

Keepalive与FIN_WAIT2

内核中keepalive和套接口FIN_WAIT2状态共用一个定时器,以上介绍了在超时处理中,keepalive的处理部分,以下为FIN_WAIT2部分的处理,后者优于keepalive功能,先行处理。如果linger2时间大于等于零,并且时长超过TCP_TIMEWAIT_LEN(60秒),TCP套接口进入TIME_WAIT状态,第三个超时参数tmo设置为剩余的linger2时间。否则,如果linger2减去TCP_TIMEWAIT_LEN的剩余时长值小于零,不在需要进入TIME_WAIT状态,发送复位报文,断开连接。

static void tcp_keepalive_timer (struct timer_list *t)
{
    struct sock *sk = from_timer(sk, t, sk_timer);
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);

    tcp_mstamp_refresh(tp);
    if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
        if (tp->linger2 >= 0) {
            const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;

            if (tmo > 0) {
                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                goto out;
            }
        }
        tcp_send_active_reset(sk, GFP_ATOMIC);
        goto death;
    }

内核版本 5.0

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值