tcp_abort_on_overflow
英文翻译理解:溢出中止
所以,溢出指的是什么溢出?
Linux中会维护socket全连接队列,所以这里的溢出指的是全连接队列的溢出。
全连接队列是什么?
全连接队列指的就是TCP三次握手中的第三次握手成功后,内核会把连接从半连接队列(Listen队列)移除,并将其添加到 accept 队列,等待应用进程传输数据。
所以,tcp_abort_on_overflow
- tcp_abort_on_overflow=0,server会丢弃client的ack或者等待队列空闲重发ack+syn。
- tcp_abort_on_overflow=1,server 会发送 reset 包给 client。
如何改变队列大小?
半连接队列大小,长度由tcp_max_syn_backlog和net.core.somaxconn和业务tcp调用listen(fd, backlog)的backlog三者最小值决定
全连接队列大小,长度由net.core.somaxconn和listen(fd, backlog)的backlog两者最小值决定
/*
* Perform a listen. Basically, we allow the protocol to do anything
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
int sysctl_somaxconn = SOMAXCONN;
asmlinkage long sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
if ((unsigned) backlog > sysctl_somaxconn)
backlog = sysctl_somaxconn;
err = security_socket_listen(sock, backlog);
if (!err)
err = sock->ops->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
}
内核源码流程
- 当新的TCP连接请求到达时,内核会调用tcp_v4_syn_recv_sock()函数来处理连接请求。(tcp_check_req函数中调用了该函数tcp_v4_syn_recv_sock)
/*
* The three way handshake has completed - we got a valid synack -
* now create the new socket.
*/
struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
struct inet_request_sock *ireq;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
if (sk_acceptq_is_full(sk))//判断是否已满
goto exit_overflow;
if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
goto exit;
newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
goto exit;
newsk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(newsk, dst);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
newinet->daddr = ireq->rmt_addr;
newinet->rcv_saddr = ireq->loc_addr;
newinet->saddr = ireq->loc_addr;
newinet->opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = skb->nh.iph->ttl;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newinet->opt)
inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
newinet->id = newtp->write_seq ^ jiffies;
tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk);
__inet_hash(&tcp_hashinfo, newsk, 0);
__inet_inherit_port(&tcp_hashinfo, sk, newsk);
return newsk;
exit_overflow:
NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
exit:
NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
- 在tcp_v4_syn_recv_sock()函数中,会首先检查全连接队列是否已满。
static inline int sk_acceptq_is_full(struct sock *sk)
{
return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
}
/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;//队列大小
err = 0;
out:
release_sock(sk);
return err;
}
const struct proto_ops inet_stream_ops = {
...
/*
在这里注册
被sys_listen中的sock->ops->listen(sock, backlog)调用注册的函数
*/
.listen = inet_listen,
...
};
- 如果全连接队列未满,会将连接请求放入队列中进行处理;如果全连接队列已满,则根据tcp_abort_on_overflow参数的设置来决定处理方式。
/*
* Process an incoming packet for SYN_RECV sockets represented
* as a request_sock.
*/
struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
......
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
* the tests. THIS SEGMENT MUST MOVE SOCKET TO
* ESTABLISHED STATE. If it will be dropped after
* socket is created, wait for troubles.
*/
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
req, NULL);//此处调用的就是tcp_v4_syn_recv_sock
if (child == NULL)
goto listen_overflow;
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
return child;
listen_overflow:
if (!sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
}
......
}
- 如果tcp_abort_on_overflow=1,表示立即终止新的连接请求。这时,内核会调用tcp_abort()函数发送RST包给客户端,拒绝连接请求。
- 如果tcp_abort_on_overflow=0,表示延迟处理连接请求。内核会等待全连接队列有空闲位置后再处理连接请求。
命令查看
-t:表示只显示TCP协议的套接字连接。
-l:表示只显示监听状态的套接字连接,即处于监听(LISTEN)状态的套接字连接。
-n:表示使用数字形式显示IP地址和端口号,而不进行域名解析。
# 半连接队列 无法直接查看
# 全连接队列
ss -tln
# TCP记录
netstat -as | sed -n '/TcpExt/,$p' | sed '/IpExt/q'
REF:半连接队列和全连接队列
总结
tcp_abort_on_overflow控制TCP连接过载时的行为,默认值为0(关闭),开启则直接RST终止连接。