socket系列文章都是承接第一篇socket创建,因此这里的编号和内核版本都继承了第一篇文章。
2. SYSCALL_DEFINE3函数
Bind系统调用通过SYSCALL_DEFINE3调用各个协议不同的bind函数,
SYSCALL_DEFINE3(bind,
int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
struct
socket *sock;
struct
sockaddr_storage address;
int
err, fput_needed;
/*根据文件描述符fd,查找到对应的套接字socket*/
sock
= sockfd_lookup_light(fd, &err, &fput_needed);
if
(sock) {
err
= move_addr_to_kernel(umyaddr, addrlen, &address);
if
(err >= 0) {
err
= security_socket_bind(sock,
(struct sockaddr *)&address,
addrlen);
if
(!err)
err = sock->ops->bind(sock,
(struct sockaddr *)
&address, addrlen);
}
fput_light(sock->file,
fput_needed);
}
return
err;
}
(1)sock->ops->bind在创建TCP类型的socket时,进行了下面的赋值初始化操作,这里的bind定位为inet_bind()函数。
onst struct proto_ops inet_stream_ops = {
.family
= PF_INET,
.owner
= THIS_MODULE,
.release =
inet_release,
.bind = inet_bind,
.connect =
inet_stream_connect,
.socketpair
= sock_no_socketpair,
.accept
= inet_accept,
.getname
= inet_getname,
.poll
= tcp_poll,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown
= inet_shutdown,
.setsockopt =
sock_common_setsockopt,
.getsockopt =
sock_common_getsockopt,
.sendmsg
= inet_sendmsg,
.recvmsg =
inet_recvmsg,
.mmap
= sock_no_mmap,
.sendpage
= inet_sendpage,
.splice_read =
tcp_splice_read,
#ifdef CONFIG_COMPAT
.compat_setsockopt
= compat_sock_common_setsockopt,
.compat_getsockopt
= compat_sock_common_getsockopt,
.compat_ioctl =
inet_compat_ioctl,
#endif
2.1 sockfd_lookup_light函数
static
struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
struct
fd f = fdget(fd);//通过fd获取到struct fd结构体,然后获取file
struct
socket *sock;
*err
= -EBADF;
if
(f.file) {
sock
= sock_from_file(f.file, err);//返回套接字所对应的指针,存储在file->private_data;在sock_alloc_file函数中对其进行赋值
if
(likely(sock)) {
*fput_needed
= f.flags;
return
sock;//返回socket结构体指针
}
fdput(f);
}
return
NULL;
}
2.2 inet_bind函数
bind系统调用通过套接口层Inet_bind(),然后便会调用传输接口层的函数,TCP中的传输层接口函数为inet_csk_get_port函数,该函数主要实现bind的作用,如果用户系统调用使用的端口号为0,系统会自动选择一个可用的端口号,这里选择可用端口号思路是:先在绑定表中选择可用的端口号,如果在绑定表中没有可用的端口号,再选择空闲的端口号。
在af_inet.c文件中。
int inet_bind(struct socket *sock, struct
sockaddr *uaddr, int addr_len)
{
struct
sockaddr_in *addr = (struct sockaddr_in *)uaddr;//要绑定的sockaddr_in结构体
struct
sock *sk = sock->sk;
struct
inet_sock *inet = inet_sk(sk);
unsigned
short snum;//要绑定的端口
int
chk_addr_ret;//地址类型
int
err;
/*
If the socket has its own bind function then use it. (RAW)对于RAW类型的socket,调用raw socket自己的bind函数raw_bind*/
if
(sk->sk_prot->bind) {
err
= sk->sk_prot->bind(sk, uaddr, addr_len);
goto
out;
}
err
= -EINVAL;
if
(addr_len < sizeof(struct sockaddr_in))//sockaddr_in长度错误
goto
out;
chk_addr_ret
= inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);//地址类型检查,看看是否回环地址,多播地址,组播地址,在下面的判断中需要使用到
/*
Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
* allowing applications to make a non-local
bind solves
* several problems with systems using dynamic
addressing.
* (ie. your servers still start up even if
your ISDN link
* is
temporarily down)
sysctl_ip_nonlocal_bind表明是否允许绑定非本地的IP地址,默认为0,不允许绑定/proc/sys/net/ipv4# cat ip_nonlocal_bind
0
上面的那段注释说明了使用非本地地址绑定可以解决一些使用动态地址绑定的服务器程序,所以这个实现还是有实际意义的
inet->freebind是通过do_ip_setsockopt函数进行设置的,默认值为1,该值表示允许绑定一个非本地IP地址和不存在的IP地址,可以通过IP_FREEBIND设置
inet->transparent:其含义就是可以使一个服务器程序侦听所有的IP地址,哪怕不是本机的IP地址
*/
err
= -EADDRNOTAVAIL;
if
(!sysctl_ip_nonlocal_bind &&
!(inet->freebind ||
inet->transparent) &&
addr->sin_addr.s_addr !=
htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
goto
out;
snum
= ntohs(addr->sin_port);//获取绑定的端口号
err
= -EACCES;
/*如果要绑定0-1023以下的端口号,需要用户具有CAP_NET_BIND_SERVICE权限PROT_SOCK就是1024*/
if
(snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
goto
out;
/* We keep a pair of addresses. rcv_saddr is
the one
*
used by hash lookups, and saddr is used for transmit.
*
*
In the BSD API these are the same except where it
*
would be illegal to use them (multicast/broadcast) in
*
which case the sending device address is used.
*/
lock_sock(sk);
/*
Check these errors (active socket, double bind). */
err
= -EINVAL;
if
(sk->sk_state != TCP_CLOSE || inet->num) //判断sk_state的状态是否为TCP_CLOSE,在创建socket时,sk_state初始为TCP_CLOSE,如果不等于TCP_CLOSE说明已经bind过,而num只有当rawsocket时才会不为0
goto
out_release_sock;
inet->rcv_saddr
= inet->saddr = addr->sin_addr.s_addr;//需要绑定的地址
if
(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->saddr
= 0; /* Use device */
/*
Make sure we are allowed to bind here.调用四层的bind函数,对于TCP来说就是inet_csk_get_port */
if (sk->sk_prot->get_port(sk, snum)) {
inet->saddr
= inet->rcv_saddr = 0;
err
= -EADDRINUSE;
goto
out_release_sock;
}
if
(inet->rcv_saddr)
sk->sk_userlocks
|= SOCK_BINDADDR_LOCK;//设置sk中的sk->userlocks表示绑定地址
if
(snum)
sk->sk_userlocks
|= SOCK_BINDPORT_LOCK;//设置sk中的sk->userlocks表示绑定端口
inet->sport
= htons(inet->num);
inet->daddr
= 0;
inet->dport
= 0;
sk_dst_reset(sk);
err
= 0;
out_release_sock:
release_sock(sk);
out:
return
err;
}
2.2.1在raw.c文件中的proto架构体的定义如下:
struct proto raw_prot = {
.name = "RAW",
.owner
= THIS_MODULE,
.close = raw_close,
.destroy =
raw_destroy,
.connect =
ip4_datagram_connect,
.disconnect =
udp_disconnect,
.ioctl = raw_ioctl,
.init
= raw_init,
.setsockopt =
raw_setsockopt,
.getsockopt =
raw_getsockopt,
.sendmsg
= raw_sendmsg,
.recvmsg =
raw_recvmsg,
.bind = raw_bind,
.backlog_rcv =
raw_rcv_skb,
.release_cb =
ip4_datagram_release_cb,
.hash = raw_hash_sk,
.unhash
= raw_unhash_sk,
.obj_size =
sizeof(struct raw_sock),
.h.raw_hash =
&raw_v4_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt
= compat_raw_setsockopt,
.compat_getsockopt
= compat_raw_getsockopt,
.compat_ioctl =
compat_raw_ioctl,
#endif
};
对于该类型的proto没有bind函数
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port =
inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo =
&tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
};