/*
* Connect to a remote host. There is regrettably still a little
* TCP 'magic' in here.
*/
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
int err;
long timeo;
lock_sock(sk);
if (uaddr->sa_family == AF_UNSPEC) {
err = sk->sk_prot->disconnect(sk, flags);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
goto out;
}
switch (sock->state) {
default:
err = -EINVAL;
goto out;
case SS_CONNECTED:
err = -EISCONN;
goto out;
case SS_CONNECTING:
err = -EALREADY;
/* Fall out of switch with err, set for this state */
break;
/* sock->state在inet_create时被设置为SS_UNCONNECTED */
case SS_UNCONNECTED:
err = -EISCONN;
/* 在inet_create中调用的sock_init_data函数中sk->sk_state被初始化为TCP_CLOSE */
/* 在inet_create中sk->sk_prot->init中同样会被初始化为TCP_CLOSE,init对应了tcp_v4_init_sock函数,初始化的动作重复了 */
if (sk->sk_state != TCP_CLOSE)
goto out;
/* 对应tcp_prot.connect,即tcp_v4_connect */
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
/× 更新当前状态为SS_CONNECTING ×/
sock->state = SS_CONNECTING;
/* Just entered SS_CONNECTING state; the only
* difference is that return value in non-blocking
* case is EINPROGRESS, rather than EALREADY.
*/
err = -EINPROGRESS;
break;
}
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
/* Error code is set above */
if (!timeo || !inet_wait_for_connect(sk, timeo))
goto out;
err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
}
/* Connection was closed by RST, timeout, ICMP error
* or another process disconnected us.
*/
if (sk->sk_state == TCP_CLOSE)
goto sock_error;
/* sk->sk_err may be not zero now, if RECVERR was ordered by user
* and error was received after socket entered established state.
* Hence, it is handled normally after connect() return successfully.
*/
sock->state = SS_CONNECTED;
err = 0;
out:
release_sock(sk);
return err;
sock_error:
err = sock_error(sk) ? : -ECONNABORTED;
sock->state = SS_UNCONNECTED;
if (sk->sk_prot->disconnect(sk, flags))
sock->state = SS_DISCONNECTING;
goto out;
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct rtable *rt;
__be32 daddr, nexthop;
int tmp;
int err;
struct ip_options *inet_opt;
/* 检查一下地址长度和协议族类型 */
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
/× 将nexthop和daddr暂时初始化为目标地址 ×/
nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference(inet->opt);
/× 如果指定了源路由选项,则按照指定的下一跳进行处理 ×/
if (inet_opt && inet_opt->srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->faddr;
}
/× 本地发包需要确认源IP地址和源端口,如果没有提供源IP地址会通过路由查找过程中的inet_select_addr来选择源地址 ×/
/× 路由查找成功后会生成路由缓存项,可以加速后续报文的发送,下面函数返回路由缓存项,包含源IP地址 ×/
tmp = ip_route_connect(&rt, nexthop, inet->saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
inet->sport, usin->sin_port, sk, 1);
if (tmp < 0) {
if (tmp == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return tmp;
}
/× 正常TCP连接查找到的都是单播路由 ×/
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
/* 使用查完路由后的目的地址更新临时daddr,有可能与用户传入的目的地址不同 */
if (!inet_opt || !inet_opt->srr)
daddr = rt->rt_dst;
/* 如果没有指定源地址,则使用路由选择的源地址 */
if (!inet->saddr)
inet->saddr = rt->rt_src;
inet->rcv_saddr = inet->saddr;
if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
/*
* VJ's idea. We save last timestamp seen from
* the destination in peer table, when entering state
* TIME-WAIT * and initialize rx_opt.ts_recent from it,
* when trying new connection.
*/
if (peer != NULL &&
peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
}
/* 记录目的端口和目的IP */
inet->dport = usin->sin_port;
inet->daddr = daddr;
/× 设置IP选项的长度 ×/
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->optlen;
/× 允许的最小MSS ×/
tp->rx_opt.mss_clamp = 536;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
*/
/× 马上要发送SYN了,所以可以更新sk->sk_state状态值了 ×/
tcp_set_state(sk, TCP_SYN_SENT);
/× 进行端口选择,并与sk绑定,下面会重点分析一下 ×/
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
/× 使用sport更新路由缓存表 ×/
err = ip_route_newports(&rt, IPPROTO_TCP,
inet->sport, inet->dport, sk);
if (err)
goto failure;
/* OK, now commit destination to socket. */
/× 更新sock的路由缓存项,设置gso标签 ×/
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->u.dst);
/× 计算发送第一个报文的初始化序列号 ×/
if (!tp->write_seq)
tp->write_seq = secure_tcp_sequence_number(inet->saddr,
inet->daddr,
inet->sport,
usin->sin_port);
/× 计算IP报文ID ×/
inet->id = tp->write_seq ^ jiffies;
/× 发送SYN报文 ×/
err = tcp_connect(sk);
rt = NULL;
if (err)
goto failure;
return 0;
failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->dport = 0;
return err;
}
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct rtable *rt;
__be32 daddr, nexthop;
int tmp;
int err;
struct ip_options *inet_opt;
/* 检查一下地址长度和协议族类型 */
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
/* 将nexthop和daddr暂时初始化为目标地址 */
nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference(inet->opt);
/* 如果指定了源路由选项,则按照指定的下一跳进行处理 */
if (inet_opt && inet_opt->srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->faddr;
}
/* 本地发包需要确认源IP地址和源端口,如果没有提供源IP地址会通过路由查找过程中的inet_select_addr来选择源地址 */
/* 路由查找成功后会生成路由缓存项,可以加速后续报文的发送,下面函数返回路由缓存项,包含源IP地址 */
tmp = ip_route_connect(&rt, nexthop, inet->saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
inet->sport, usin->sin_port, sk, 1);
if (tmp < 0) {
if (tmp == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return tmp;
}
/* 正常TCP连接查找到的都是单播路由 */
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
/* 使用查完路由后的目的地址更新临时daddr,有可能与用户传入的目的地址不同 */
if (!inet_opt || !inet_opt->srr)
daddr = rt->rt_dst;
/* 如果没有指定源地址,则使用路由选择的源地址 */
if (!inet->saddr)
inet->saddr = rt->rt_src;
inet->rcv_saddr = inet->saddr;
if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
struct inet_peer *peer = rt_get_peer(rt);
/*
* VJ's idea. We save last timestamp seen from
* the destination in peer table, when entering state
* TIME-WAIT * and initialize rx_opt.ts_recent from it,
* when trying new connection.
*/
if (peer != NULL &&
peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
}
/* 记录目的端口和目的IP */
inet->dport = usin->sin_port;
inet->daddr = daddr;
/* 设置IP选项的长度 */
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->optlen;
/* 允许的最小MSS */
tp->rx_opt.mss_clamp = 536;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
*/
/* 马上要发送SYN了,所以可以更新sk->sk_state状态值了 */
tcp_set_state(sk, TCP_SYN_SENT);
/* 进行端口选择,并与sk绑定,下面会重点分析一下 */
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
/* 使用sport更新路由缓存表 */
err = ip_route_newports(&rt, IPPROTO_TCP,
inet->sport, inet->dport, sk);
if (err)
goto failure;
/* OK, now commit destination to socket. */
/* 更新sock的路由缓存项,设置gso标签 */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->u.dst);
/* 计算发送第一个报文的初始化序列号 */
if (!tp->write_seq)
tp->write_seq = secure_tcp_sequence_number(inet->saddr,
inet->daddr,
inet->sport,
usin->sin_port);
/* 计算IP报文ID */
inet->id = tp->write_seq ^ jiffies;
/* 发送SYN报文 */
err = tcp_connect(sk);
rt = NULL;
if (err)
goto failure;
return 0;
failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->dport = 0;
return err;
}