注:本文分析基于3.10.0-693.el7内核版本,即CentOS 7.4
1、函数原型
int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
参数说明:
sockfd:套接字的文件描述符,socket()系统调用返回的fd
addr:指向存放地址信息的结构体的首地址
addrlen:存放地址信息的结构体的大小,其实也就是sizof(struct sockaddr)
可以看出,connect()系统调用的入参和bind()系统调用入参是一致的。
2、内核实现
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
int, addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
int err, fput_needed;
//通过fd文件描述符,获取对应的socket结构体,bind,listen都有这个操作
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
err = move_addr_to_kernel(uservaddr, addrlen, &address);
if (err < 0)
goto out_put;
err = security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
if (err)
goto out_put;
//connect调用的主处理函数
err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
sock->file->f_flags);
out_put:
fput_light(sock->file, fput_needed);
out:
return err;
}
和bind(),listen()一样,第一步肯定是通过fd获取到socket结构。接下来sock->ops指向的是inet_stream_ops,因此sock->ops->connect调用的就是inet_stream_connect()。inet_stream_connect()对__inet_stream_connect()做了一个简单的封装。
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
int err;
lock_sock(sock->sk);
err = __inet_stream_connect(sock, uaddr, addr_len, flags);
release_sock(sock->sk);
return err;
}
int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
int err;
long timeo;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC) {
err = sk->sk_prot->disconnect(sk, flags);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
goto out;
}
//判断socket状态
switch (sock->state) {
default:
err = -EINVAL;
goto out;
case SS_CONNECTED:
err = -EISCONN;
goto out;
case SS_CONNECTING:
err = -EALREADY;
/* Fall out of switch with err, set for this state */
break;
case SS_UNCONNECTED://未建立连接,因此发起连接走的是这个流程
err = -EISCONN;
if (sk->sk_state != TCP_CLOSE)
goto out;
//主处理函数,最终调用的是tcp_v4_connect()函数
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
sock->state = SS_CONNECTING;
/* Just entered SS_CONNECTING state; the only
* difference is that return value in non-blocking
* case is EINPROGRESS, rather than EALREADY.
*/
//如果是非阻塞调用,那么最后返回的就是这个错误码
err = -EINPROGRESS;
break;
}
//如果connect设置的是非阻塞,获取超时时间
//超时时间可以通过SO_SNDTIMEO选项设置
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
tcp_sk(sk)->fastopen_req &&
tcp_sk(sk)->fastopen_req->data ? 1 : 0;
/* Error code is set above */
//非阻塞时,timeo为0,直接返回;否则设置定时器,然后调度出去,等待超时返回
if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
goto out;
err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
}
...
}
sk->sk_prot指向tcp_prot,因此sk->sk_prot->connect最终调用的就是tcp_v4_connect()。
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
...
nexthop = daddr = usin->sin_addr.s_addr;//赋值下一跳地址和目的地址,
inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->opt.faddr;
}
orig_sport = inet->inet_sport;//源地址
orig_dport = usin->sin_port;//源端口
fl4 = &inet->cork.fl.u.ip4;
//根据当前信息,查找路由,并新建路由缓存
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk);
...
if (!inet->inet_saddr)
//如果socket没有绑定ip地址,使用路由查询返回的结果
inet->inet_saddr = fl4->saddr;
//inet_rcv_saddr表示的是本地绑定的ip地址,也就是源地址
inet->inet_rcv_saddr = inet->inet_saddr;
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
tp->write_seq = 0;
}
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst);
inet->inet_dport = usin->sin_port;//目的端口
inet->inet_daddr = daddr;//目的地址
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
tcp_set_state(sk, TCP_SYN_SENT);//socket进入SYN-SENT状态
//绑定ip和端口号,并将sock加入哈希表中
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
sk_set_txhash(sk);
//使用新的端口号再次做路由查询,
//因为如果客户端没有用bind()绑定IP地址和端口号,上面inet_hash_connect()
//就会自动选择一个端口号,因此源端口会不一样
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
if (!tp->write_seq && likely(!tp->repair))
//生成序列号
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
inet->inet_id = tp->write_seq ^ jiffies;
//由socket层转入TCP层,构造SYN报文并发送
err = tcp_connect(sk);
...
}
我们知道,一般情况客户端发起连接不会调用bind()绑定ip和端口号,这个动作交由系统自动处理,其实就是在调用connect()时,由inet_hash_connect()完成这一操作。
/*
* Bind a port for a connect operation and hash it.
*/
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
__inet_check_established, __inet_hash_nolisten);
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **),
int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
const unsigned short snum = inet_sk(sk)->inet_num;
struct inet_bind_hashbucket *head;
struct inet_bind_bucket *tb;
int ret;
struct net *net = sock_net(sk);
int twrefcnt = 1;
if (!snum) {//没有绑定端口
//这里选择端口和bind()调用中选择端口差不多
int i, remaining, low, high, port;
static u32 hint;
u32 offset = hint + port_offset;
struct inet_timewait_sock *tw = NULL;
inet_get_local_port_range(net, &low, &high);//端口范围
remaining = (high - low) + 1;
/* By starting with offset being an even number,
* we tend to leave about 50% of ports for other uses,
* like bind(0).
*/
offset &= ~1;
for (i = 0; i < remaining; i++) {
port = low + (i + offset) % remaining;
if (inet_is_reserved_local_port(port))//端口是否被保留
continue;
head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
spin_lock_bh(&head->lock);
//检查该端口是否会和bind哈希表的连接冲突
inet_bind_bucket_for_each(tb, &head->chain) {
if (net_eq(ib_net(tb), net) && tb->port == port) {
//fastreuse和fastreuseport>=0大部分情况都成立,
//初始化时为0,使用tcp选项设置时大于0
//所以只有通过connect()选取的端口才允许connect()重用
//其实也就是为了避免重用bind使用的端口
//当然因为后面这两个参数赋值为-1,因此bind()里也是可以重用该端口的
//但bind使用过后这两个参数就置为1了(可重用的情况下),后续connect就不能再使用
if (tb->fastreuse >= 0 || tb->fastreuseport >= 0)
goto next_port;
WARN_ON(hlist_empty(&tb->owners));
//如果和bind哈希表不冲突,再检查是否和established以及time_wait表冲突
if (!check_established(death_row, sk, port, &tw))
goto ok;
goto next_port;
}
}
//当前端口不冲突,可以使用,创建inet_bind_bucket保存ip|port信息
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
net, head, port);
if (!tb) {
spin_unlock_bh(&head->lock);
break;
}
//设置地址端口重用参数为-1
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
next_port:
spin_unlock_bh(&head->lock);
cond_resched();
}
return -EADDRNOTAVAIL;
ok:
hint += (i + 2) & ~1;
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);//加入bind哈希表
if (sk_unhashed(sk)) {
//设置该sk的源端口
inet_sk(sk)->inet_sport = htons(port);
//如果没有加入到established哈希表中
//调用hash,即__inet_hash_nolisten加入ehash中
twrefcnt += hash(sk, tw);
}
if (tw)
//把刚才查找到的time_wait sk从bind哈希表中摘除
twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock);
if (tw) {
//和__inet_check_established一样
//将bind哈希表和established哈希表中的该tw sock删除
//不知道为什么非要区分
inet_twsk_deschedule(tw, death_row);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw);
}
}
ret = 0;
goto out;
}
//如果绑定了端口
head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
//绑定该端口的连接只有当前这一个,肯定不会冲突,直接使用
//调用hash,即__inet_hash_nolisten()加入ehash哈希表中
hash(sk, NULL);
spin_unlock_bh(&head->lock);
return 0;
} else {
spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
//检查是否和established以及time_wait表冲突
//这里都不考虑是否可重用就直接判断是否冲突,不合适吧
//毕竟__inet_check_established()里除了判断端口可重用
//链表里的sk是否允许重用并没有检查
ret = check_established(death_row, sk, snum, NULL);
out:
local_bh_enable();
return ret;
}
}
绑定端口的时候就和bind()系统调用相似了,要考虑系统选择的端口是否会有冲突,这其中涉及几条链表的冲突判断。第一条自然是bind哈希表,然后是established哈希表,在后续的内核版本中将time_wait独立为一个哈希表,因此也要检查这个time_wait哈希表。当前CentOS 7.4内核仍然是一个链表。在这就是通过__inet_check_established来判断是否和established或者time_wait连接冲突的。
static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk, __u16 lport,
struct inet_timewait_sock **twp)
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
__be32 daddr = inet->inet_rcv_saddr;
__be32 saddr = inet->inet_daddr;
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
unsigned int hash = inet_ehashfn(net, daddr, lport,
saddr, inet->inet_dport);
//根据四元组获取established哈希表
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock);
sk_nulls_for_each(sk2, node, &head->chain) {
if (sk2->sk_hash != hash)
continue;
//socket四元组信息等是否相同
if (likely(INET_MATCH(sk2, net, acookie,
saddr, daddr, ports, dif))) {
//如果各信息相同,考虑time_wait的复用
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
//检查time_wait状态的连接是否可以重用
if (twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
}
}
/* Must record num and sport now. Otherwise we will see
* in hash table socket with a funny identity.
*/
inet->inet_num = lport;//设置绑定端口号
inet->inet_sport = htons(lport);//设置源端口
sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk));
//这就加入到established表中了?不合适吧
//不应该在第二次握手后,客户端才认为连接建立了吗
//难道对于重用的sk处理就是如此。。。
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
//从established表中摘除这个time_wait sk
//在后面的内核版本中,established和time_wait表分开了
//当前版本(CentOS 7.4)仍是同一个链表
twrefcnt = inet_twsk_unhash(tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
if (twrefcnt)
inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
//如果twp不为空,将该time_wait sk返回
if (twp) {
*twp = tw;
} else if (tw) {
//twp为空,即不需要返回time_wait的socket,在此做释放
//将bind哈希表和established哈希表中的该tw sock删除
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
inet_twsk_put(tw);
}
return 0;
not_unique:
spin_unlock(lock);
return -EADDRNOTAVAIL;
}
这其中time_wait状态的socket是比较特殊的,因为即使TCP的四元组都相同,如果设置了tcp_tw_reuse参数的话,是可以复用的。这里就是通过twsk_unique()检查。
static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
if (sk->sk_prot->twsk_prot->twsk_unique != NULL)
return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp);
return 0;
}
因为sk->sk_prot指向tcp_prot,因此sk->sk_prot->twsk_prot指向的是tcp_timewait_sock_ops。
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
};
因此最终通过tcp_twsk_unique()判断该time_wait状态socket能不能被connect复用。
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
if (tcptw->tw_ts_recent_stamp && //不冲突的条件为链表中的连接开启了时间戳选项
//twp为空,即不需要返回time_wait的socket
(twp == NULL ||
//或者开启了time_wait状态sk重用选项,且当前sk创建时间比该time_wait sk晚1s
(sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
//不冲突,复用该连接的一些ip选项信息
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
sock_hold(sktw);
return 1;
}
return 0;
}
绑定好ip和端口后,我们回到tcp_v4_connect()函数中。接下来就可以发送SYN报文了。这就是tcp_connect()干的事了。
/* Build a SYN and send it off. */
int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int err;
//初始化各种连接参数,比如初始RTO值为1HZ,即1s
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
tcp_finish_connect(sk, NULL);
return 0;
}
//分配skb结构体
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
if (unlikely(buff == NULL))
return -ENOBUFS;
/* Reserve space for headers. */
skb_reserve(buff, MAX_TCP_HEADER);
//初始化skb
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
tp->retrans_stamp = tcp_time_stamp;//记录发包时间
tcp_connect_queue_skb(sk, buff);
TCP_ECN_send_syn(sk, buff);
/* Send off SYN; include data in Fast Open. */
//如果开启快速重传,调用tcp_send_syn_data()发送syn报文,否则走tcp_transmit_skb()
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
if (err == -ECONNREFUSED)
return err;
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
//启动重传定时器
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
return 0;
}
初始化连接的各种参数之后,分配一个skb_buff,用于承载SYN报文。这里我们先不考虑快速开启的情况,因此会调用tcp_transmit_skb()来接力对SYN报文的发送。
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
if (clone_it) {
skb_mstamp_get(&skb->skb_mstamp);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
}
inet = inet_sk(sk);
tp = tcp_sk(sk);
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
//为syn报文构建tcp选项
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
else
//为已建立连接的socket构建tcp选项
tcp_options_size = tcp_established_options(sk, skb, &opts,
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
if (tcp_packets_in_flight(tp) == 0)
//网络上没有改连接的报文,即第一次传输报文
tcp_ca_event(sk, CA_EVENT_TX_START);
/* if no packet is in qdisc/device queue, then allow XPS to select
* another queue.
*/
skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
skb_orphan(skb);
skb->sk = sk;
skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
/* Build TCP header and checksum it. */
//开始构建tcp头部
th = tcp_hdr(skb);
th->source = inet->inet_sport;//源端口
th->dest = inet->inet_dport;//目的端口
th->seq = htonl(tcb->seq);//序列号
th->ack_seq = htonl(tp->rcv_nxt);//ack号
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags);
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
th->window = htons(min(tp->rcv_wnd, 65535U));
} else {
th->window = htons(tcp_select_window(sk));
}
th->check = 0;
th->urg_ptr = 0;
/* The urg_mode check is necessary during a below snd_una win probe */
if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
if (before(tp->snd_up, tcb->seq + 0x10000)) {
th->urg_ptr = htons(tp->snd_up - tcb->seq);
th->urg = 1;
} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
th->urg_ptr = htons(0xFFFF);
th->urg = 1;
}
}
//tcp选项
tcp_options_write((__be32 *)(th + 1), tp, &opts);
if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
TCP_ECN_send(sk, skb, tcp_header_size);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
tp->af_specific->calc_md5_hash(opts.hash_location,
md5, sk, NULL, skb);
}
#endif
//生成检验和checksum
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
if (skb->len != tcp_header_size)
tcp_event_data_sent(tp, sk);
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
tp->segs_out += tcp_skb_pcount(skb);
/* Our usage of tstamp should remain private */
skb->tstamp.tv64 = 0;
//发往ip层处理了,调用ip_queue_xmit()
err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
if (likely(err <= 0))
return err;
tcp_enter_cwr(sk);
return net_xmit_eval(err);
}
这里所做的就是构建TCP的头部信息,以及TCP选项等,这样TCP层的处理就完成了,调用icsk->icsk_af_ops->queue_xmit,发往ip层继续处理。icsk->icsk_af_ops指向ipv4_specific。
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
.sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
...
};
因此icsk->icsk_af_ops->queue_xmit最终是调用ip_queue_xmit()继续在IP层处理。这里就不再往下分析了,后面抽个时间把整个收发包流程梳理一遍。
但是connect的职责并没有就此结束,因为要等到三次握手的第二次握手成功后,connect才能算任务完成。这就涉及到SYN报文的接收以及SYN|ACK报文的发送,以及第三次握手的ACK的发送了。我们慢慢来。