读Linux内核(4.9.9)之bind系统调用

在调用socket成功返回后,我们得到与socket关联的文件描述符。然后我们以该描述符和sockaddr地址结构对象为参数调用bind,就实现了socket对象地址的绑定。那这个绑定到底是个什么意思?这个绑定操作是必须吗?绑定操作之后,socket对象又发生了什么?也许还有更多的疑问,我们在协议栈的源码寻找答案。先贴上bind系统调用的源码:

/*
 *	Bind a name to a socket. Nothing much to do here since it's
 *	the protocol's responsibility to handle the local address.
 *
 *	We move the socket address to kernel space before we call
 *	the protocol layer (having also checked the address is ok).
 */
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
	struct socket *sock;
	struct sockaddr_storage address;
	int err, fput_needed;

    //根据文件描述符获得file对象,然后从private_data成员获得socket对象
	sock = sockfd_lookup_light(fd, &err, &fput_needed);
	if (sock) {
		//从用户空间拷贝地址信息到内核
		err = move_addr_to_kernel(umyaddr, addrlen, &address);
		if (err >= 0) {
			err = security_socket_bind(sock,
						   (struct sockaddr *)&address,
						   addrlen);

			//sock->ops在 inet_create 初始化
			//在inetsw_array数组有套接字类型和协议的对应描述对象
			
			//对于流式套接字,sock->ops为 inet_stream_ops
			

			//对于数据报套接字,sock->ops为 inet_dgram_ops
			//sock->ops->bind为 inet_bind

			//对于tcp实质是将sock对象放到哈希表中,对于tcp协议,则放到 tcp_hashinfo 的bhash

			
			if (!err)
				err = sock->ops->bind(sock,
						      (struct sockaddr *)
						      &address, addrlen);
		}
		fput_light(sock->file, fput_needed);
	}
	return err;
}

通过传入的文件描述符,我们找到对应的file对象,然后通过file对象的private_data指针找到socket对象。接着将包含地址信息的sockaddr对象从用户空间拷贝到内核空间,接着是安全方面的东西, 接着就是我们关注的重点:

if (!err)
	err = sock->ops->bind(sock,
				(struct sockaddr *)
				 &address, addrlen);

我们在执行socket系统调用时,会传入套接字的类型(流式套接字SOCK_STREAM,数据包套接字SOCK_DGRAM,原生套接字SOCK_RAW),根据该类型,我们会将相应类型的操作函数集指针,以及协议描述块指针赋值给socket对象,如对于流式套接字,操作函数集为inet_stream_ops,实现协议为描述块为tcp_prot

static int inet_create(struct net *net, struct socket *sock, int protocol,  
               int kern)  
{  
...

 
    //根据套接字类型sock->type得到协议的关联对象  
    //套接字类型与协议的关联对象数组定义在inetsw_array,inet_init函数中会加载到inetsw拉链表  
    //其实如果只有inet协议族,则这个链表只有一个对象  
    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {  
  
        err = 0;  
        /* Check the non-wild match. */  
        if (protocol == answer->protocol) {  
            if (protocol != IPPROTO_IP)  
                break;  
        } else {  
            /* Check for the two wild cases. */  
            if (IPPROTO_IP == protocol) {  
                protocol = answer->protocol;  
                break;  
            }  
            if (IPPROTO_IP == answer->protocol)  
                break;  
        }  
        err = -EPROTONOSUPPORT;  
    }  

...

    //inetsw_array  
  
    //特定协议的操作函数集,声明在net.h    
    //对于tcp: inet_stream_ops  
    sock->ops = answer->ops;  
    //INET层协议描述块   
    //对于tcp, tcp_prot  
    answer_prot = answer->prot;  
      
    answer_flags = answer->flags;  
...

}

参考http://blog.csdn.net/idwtwt/article/details/50964302我们知道inetsw源自于inetsw_array

/* Upon startup we insert all the elements in inetsw_array[] into
 * the linked list inetsw.
 */
static struct inet_protosw inetsw_array[] =
{
	{
		.type =       SOCK_STREAM,
		.protocol =   IPPROTO_TCP,
		.prot =       &tcp_prot,
		.ops =        &inet_stream_ops,
		.flags =      INET_PROTOSW_PERMANENT |
			      INET_PROTOSW_ICSK,
	},

	{
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_UDP,
		.prot =       &udp_prot,
		.ops =        &inet_dgram_ops,
		.flags =      INET_PROTOSW_PERMANENT,
       },

       {
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_ICMP,
		.prot =       &ping_prot,
		.ops =        &inet_dgram_ops,
		.flags =      INET_PROTOSW_REUSE,
       },

       {
	       .type =       SOCK_RAW,
	       .protocol =   IPPROTO_IP,	/* wild card */
	       .prot =       &raw_prot,
	       .ops =        &inet_sockraw_ops,
	       .flags =      INET_PROTOSW_REUSE,
       }
};

结合起来看:

1 对于流式套接字ops字段将被赋值inet_stream_ops指针

2 对于数据报套接字ops字段将被赋值SOCK_DGRAM指针

3 对于原生套接字ops字段将被赋值inet_sockraw_ops指针

操作函数集详细定义为:

const struct proto_ops inet_stream_ops = {
	.family		   = PF_INET,
	.owner		   = THIS_MODULE,
	.release	   = inet_release,
	.bind		   = inet_bind,
	.connect	   = inet_stream_connect,
	.socketpair	   = sock_no_socketpair,
	.accept		   = inet_accept,
	.getname	   = inet_getname,
	.poll		   = tcp_poll,
	.ioctl		   = inet_ioctl,
	.listen		   = inet_listen,
	.shutdown	   = inet_shutdown,
	.setsockopt	   = sock_common_setsockopt,
	.getsockopt	   = sock_common_getsockopt,
	.sendmsg	   = inet_sendmsg,
	.recvmsg	   = inet_recvmsg,
	.mmap		   = sock_no_mmap,
	.sendpage	   = inet_sendpage,
	.splice_read	   = tcp_splice_read,
	.read_sock	   = tcp_read_sock,
	.peek_len	   = tcp_peek_len,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_sock_common_setsockopt,
	.compat_getsockopt = compat_sock_common_getsockopt,
	.compat_ioctl	   = inet_compat_ioctl,
#endif
};
EXPORT_SYMBOL(inet_stream_ops);

const struct proto_ops inet_dgram_ops = {
	.family		   = PF_INET,
	.owner		   = THIS_MODULE,
	.release	   = inet_release,
	.bind		   = inet_bind,
	.connect	   = inet_dgram_connect,
	.socketpair	   = sock_no_socketpair,
	.accept		   = sock_no_accept,
	.getname	   = inet_getname,
	.poll		   = udp_poll,
	.ioctl		   = inet_ioctl,
	.listen		   = sock_no_listen,
	.shutdown	   = inet_shutdown,
	.setsockopt	   = sock_common_setsockopt,
	.getsockopt	   = sock_common_getsockopt,
	.sendmsg	   = inet_sendmsg,
	.recvmsg	   = inet_recvmsg,
	.mmap		   = sock_no_mmap,
	.sendpage	   = inet_sendpage,
	.set_peek_off	   = sk_set_peek_off,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_sock_common_setsockopt,
	.compat_getsockopt = compat_sock_common_getsockopt,
	.compat_ioctl	   = inet_compat_ioctl,
#endif
};
EXPORT_SYMBOL(inet_dgram_ops);

/*
 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
 * udp_poll
 */
static const struct proto_ops inet_sockraw_ops = {
	.family		   = PF_INET,
	.owner		   = THIS_MODULE,
	.release	   = inet_release,
	.bind		   = inet_bind,
	.connect	   = inet_dgram_connect,
	.socketpair	   = sock_no_socketpair,
	.accept		   = sock_no_accept,
	.getname	   = inet_getname,
	.poll		   = datagram_poll,
	.ioctl		   = inet_ioctl,
	.listen		   = sock_no_listen,
	.shutdown	   = inet_shutdown,
	.setsockopt	   = sock_common_setsockopt,
	.getsockopt	   = sock_common_getsockopt,
	.sendmsg	   = inet_sendmsg,
	.recvmsg	   = inet_recvmsg,
	.mmap		   = sock_no_mmap,
	.sendpage	   = inet_sendpage,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_sock_common_setsockopt,
	.compat_getsockopt = compat_sock_common_getsockopt,
	.compat_ioctl	   = inet_compat_ioctl,
#endif
};

可知三种类型的套接字,sock->ops->bind实际调用的都是inet_bind,我们分析下该函数:

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
	struct sock *sk = sock->sk;
	struct inet_sock *inet = inet_sk(sk);
	struct net *net = sock_net(sk);
	unsigned short snum;
	int chk_addr_ret;
	u32 tb_id = RT_TABLE_LOCAL;
	int err;

	/* If the socket has its own bind function then use it. (RAW) */
	
    //对于tcp, tcp_prot
    //对于tcp,是没有bind的,所以下面不会进入if
    
    //slab在inet_init中调用proto_register时建立
	if (sk->sk_prot->bind) {
		err = sk->sk_prot->bind(sk, uaddr, addr_len);
		goto out;
	}
	err = -EINVAL;
	if (addr_len < sizeof(struct sockaddr_in))
		goto out;

	if (addr->sin_family != AF_INET) {
		/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
		 * only if s_addr is INADDR_ANY.
		 */
		err = -EAFNOSUPPORT;
		if (addr->sin_family != AF_UNSPEC ||
		    addr->sin_addr.s_addr != htonl(INADDR_ANY))
			goto out;
	}

	tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
	chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);

	/* Not specified by any standard per-se, however it breaks too
	 * many applications when removed.  It is unfortunate since
	 * allowing applications to make a non-local bind solves
	 * several problems with systems using dynamic addressing.
	 * (ie. your servers still start up even if your ISDN link
	 *  is temporarily down)
	 */
	err = -EADDRNOTAVAIL;
	if (!net->ipv4.sysctl_ip_nonlocal_bind &&
	    !(inet->freebind || inet->transparent) &&
	    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
	    chk_addr_ret != RTN_LOCAL &&
	    chk_addr_ret != RTN_MULTICAST &&
	    chk_addr_ret != RTN_BROADCAST)
		goto out;

	snum = ntohs(addr->sin_port);
	err = -EACCES;
	if (snum && snum < PROT_SOCK &&
	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
		goto out;

	/*      We keep a pair of addresses. rcv_saddr is the one
	 *      used by hash lookups, and saddr is used for transmit.
	 *
	 *      In the BSD API these are the same except where it
	 *      would be illegal to use them (multicast/broadcast) in
	 *      which case the sending device address is used.
	 */
	lock_sock(sk);

	/* Check these errors (active socket, double bind). */

	//检查错误,重复绑定?
	
	//如果套接字不在初始状态TCP_CLOSE,或者已经绑定端口了,则出错。  
	
	//一个socket最多可以绑定一个端口,而一个端口则可能被多个socket共用。  
	err = -EINVAL;
	if (sk->sk_state != TCP_CLOSE || inet->inet_num)
		goto out_release_sock;

	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
		inet->inet_saddr = 0;  /* Use device */

	/* Make sure we are allowed to bind here. */

	//对于tcp, tcp_prot, inet_csk_get_port
	//一般分配奇数
	//端口可用的话返回0。 

	//将socket加入的bind哈希表中

	//在如果成功找到哈希桶,就在inet_bind_hash函数中设置sk的端口号,而且将sk添加到哈希桶的拥有者队列中

	//哈希桶结构对象为inet_bind_bucket

	//tcp协议的哈希表为tcp_hashinfo

	
	if ((snum || !inet->bind_address_no_port) &&		
	    sk->sk_prot->get_port(sk, snum)) {
		inet->inet_saddr = inet->inet_rcv_saddr = 0;
		err = -EADDRINUSE;
		goto out_release_sock;
	}

    //inet_rcv_saddr表示绑定的地址,接收数据时用于查找socket 
	if (inet->inet_rcv_saddr)
		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
	if (snum)
		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
	inet->inet_sport = htons(inet->inet_num);
	inet->inet_daddr = 0;
	inet->inet_dport = 0;
	sk_dst_reset(sk);
	err = 0;
out_release_sock:
	release_sock(sk);
out:
	return err;
}

分析该函数,我们发现如果套接字对应的实现协议本身有bind函数,会执行协议的bind函数然后调到函数结尾返回;如果对应的实现协议本身没有定义bind函数则往下执行。查看tcp和udp的协议描述块,我们没有发现有bind函数,所以对于tcp和udp来说if内的代码是没有执行的,而是往下执行。

//slab在inet_init中调用proto_register时建立
struct proto tcp_prot = {
	.name			= "TCP",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
	.connect		= tcp_v4_connect,
	.disconnect		= tcp_disconnect,
	.accept			= inet_csk_accept,
	.ioctl			= tcp_ioctl,
	.init			= tcp_v4_init_sock,
	.destroy		= tcp_v4_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
	.recvmsg		= tcp_recvmsg,
	.sendmsg		= tcp_sendmsg,
	.sendpage		= tcp_sendpage,
	.backlog_rcv		= tcp_v4_do_rcv,
	.release_cb		= tcp_release_cb,
	.hash			= inet_hash,
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
	.enter_memory_pressure	= tcp_enter_memory_pressure,
	.stream_memory_free	= tcp_stream_memory_free,
	.sockets_allocated	= &tcp_sockets_allocated,
	.orphan_count		= &tcp_orphan_count,
	.memory_allocated	= &tcp_memory_allocated,
	.memory_pressure	= &tcp_memory_pressure,
	.sysctl_mem		= sysctl_tcp_mem,
	.sysctl_wmem		= sysctl_tcp_wmem,
	.sysctl_rmem		= sysctl_tcp_rmem,
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp_sock),
	.slab_flags		= SLAB_DESTROY_BY_RCU,
	.twsk_prot		= &tcp_timewait_sock_ops,
	.rsk_prot		= &tcp_request_sock_ops,
	.h.hashinfo		= &tcp_hashinfo,
	.no_autobind		= true,
#ifdef CONFIG_COMPAT
	.compat_setsockopt	= compat_tcp_setsockopt,
	.compat_getsockopt	= compat_tcp_getsockopt,
#endif
	.diag_destroy		= tcp_abort,
};

//slab在inet_init中调用proto_register时建立

struct proto udp_prot = {
	.name		   = "UDP",
	.owner		   = THIS_MODULE,
	.close		   = udp_lib_close,
	.connect	   = ip4_datagram_connect,
	.disconnect	   = udp_disconnect,
	.ioctl		   = udp_ioctl,
	.destroy	   = udp_destroy_sock,
	.setsockopt	   = udp_setsockopt,
	.getsockopt	   = udp_getsockopt,
	.sendmsg	   = udp_sendmsg,
	.recvmsg	   = udp_recvmsg,
	.sendpage	   = udp_sendpage,
	.backlog_rcv	   = __udp_queue_rcv_skb,
	.release_cb	   = ip4_datagram_release_cb,
	.hash		   = udp_lib_hash,
	.unhash		   = udp_lib_unhash,
	.rehash		   = udp_v4_rehash,
	.get_port	   = udp_v4_get_port,
	.memory_allocated  = &udp_memory_allocated,
	.sysctl_mem	   = sysctl_udp_mem,
	.sysctl_wmem	   = &sysctl_udp_wmem_min,
	.sysctl_rmem	   = &sysctl_udp_rmem_min,
	.obj_size	   = sizeof(struct udp_sock),
	.h.udp_table	   = &udp_table,
#ifdef CONFIG_COMPAT
	.compat_setsockopt = compat_udp_setsockopt,
	.compat_getsockopt = compat_udp_getsockopt,
#endif
	.diag_destroy	   = udp_abort,
};

只有对于原生类型的套接字才有自己的bind函数:

    /* If the socket has its own bind function then use it. (RAW) */
	
    //对于tcp, tcp_prot
    //对于tcp,是没有bind的,所以下面不会进入if
    
    //slab在inet_init中调用proto_register时建立
	if (sk->sk_prot->bind) {
		err = sk->sk_prot->bind(sk, uaddr, addr_len);
		goto out;
	}

往下的代码主要是地址信息合法性检查,还有inet_sock对象的设置(实际上是udp_sock对象中的inetd对象——查看udp_sock定义,可知udp_sock中包含一个inet_sock对象),申请端口等。留意端口申请代码:

	if (sk->sk_prot->get_port(sk, snum)) {/*->将sock对象加入哈希表udp_table*/ udp_v4_get_port --> udp_lib_get_port inetsw_array
		inet->saddr = inet->rcv_saddr = 0;
		err = -EADDRINUSE;
		goto out_release_sock;
	}

查看协议描述块可知,对于tcp,调用的是inet_csk_get_port,对于udp调用的是udp_v4_get_port,我们只分析inet_csk_get_port

/* Obtain a reference to a local port for the given sock,
 * if snum is zero it means select any available local port.
 * We try to allocate an odd port (and leave even ports for connect())
 */
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
	//对于tcp, tcp_prot, tcp_hashinfo
	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
	
	int ret = 1, attempts = 5, port = snum;
	int smallest_size = -1, smallest_port;
	struct inet_bind_hashbucket *head;
	struct net *net = sock_net(sk);
	int i, low, high, attempt_half;
	struct inet_bind_bucket *tb;
	kuid_t uid = sock_i_uid(sk);
	u32 remaining, offset;

	if (port) {
have_port:
	    //对于tcp, tcp_prot, tcp_hashinfo
	    //hinfo,即tcp_hashinfo在tcp_init初始化
	    
	    //inet_bhashfn是端口号port与上哈希表长度

		//head是哈希得到的链表头

		//每一项都是一个链表,存储值相同的tcp_sock(这些sock可能是端口复用的)。

		//根据端口号,确定所在的哈希桶
		head = &hinfo->bhash[inet_bhashfn(net, port,
						  hinfo->bhash_size)];
		spin_lock_bh(&head->lock);

		//枚举链表中的每一项
		inet_bind_bucket_for_each(tb, &head->chain)
			if (net_eq(ib_net(tb), net) && tb->port == port)
				goto tb_found;

		goto tb_not_found;
	}
again:
	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
	
	//这种情况就是随机绑定一个没有使用的端口
	/* 获取端口号的取值范围 */  
	inet_get_local_port_range(net, &low, &high);
	high++; /* [32768, 60999] -> [32768, 61000[ */
	if (high - low < 4)
		attempt_half = 0;
	if (attempt_half) {
		int half = low + (((high - low) >> 2) << 1);

		if (attempt_half == 1)
			high = half;
		else
			low = half;
	}
	remaining = high - low;
	if (likely(remaining > 1))
		remaining &= ~1U;

	offset = prandom_u32() % remaining;
	/* __inet_hash_connect() favors ports having @low parity
	 * We do the opposite to not pollute connect() users.
	 */
	offset |= 1U;
	smallest_size = -1;
	smallest_port = low; /* avoid compiler warning */

other_parity_scan:
	port = low + offset;
	for (i = 0; i < remaining; i += 2, port += 2) {
		if (unlikely(port >= high))
			port -= remaining;
		if (inet_is_local_reserved_port(net, port))
			continue;
		//对于tcp, tcp_prot, tcp_hashinfo
		head = &hinfo->bhash[inet_bhashfn(net, port,
						  hinfo->bhash_size)];
		spin_lock_bh(&head->lock);

		/* 从头遍历哈希桶 */  
		inet_bind_bucket_for_each(tb, &head->chain)
			if (net_eq(ib_net(tb), net) && tb->port == port) {
				 /* 如果端口被使用了 */  
				if (((tb->fastreuse > 0 && reuse) ||
				     (tb->fastreuseport > 0 &&
				      sk->sk_reuseport &&
				      !rcu_access_pointer(sk->sk_reuseport_cb) &&
				      uid_eq(tb->fastuid, uid))) &&
				    (tb->num_owners < smallest_size || smallest_size == -1)) {
					smallest_size = tb->num_owners;/* 记下这个端口使用者的个数 */  
					smallest_port = port;/* 记下这个端口 */  
				}


//在以下的情况下可以重用端口:
//1.绑定不同网络接口的可以使用同一个端口;
//2.每一个设置了地址重用的并且都不处于listen状态的所有的套接字可以使用一个端口,这意味着它们都是主动外出的套接字,目的由它们自己掌握;
//即便在1和2都不满足的情况下,使用不同源地址的服务器套接字也可以使用同一个端口

//对于一般的tcp协议,该处理冲突的回调函数就是 inet_csk_bind_conflict

				 /* 如果系统绑定的端口已经很多了,那么就判断端口是否有绑定冲突*/  
				if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
					goto tb_found;
				goto next_port;
			}
		goto tb_not_found;
next_port:
		spin_unlock_bh(&head->lock);
		cond_resched();
	}

	if (smallest_size != -1) {
		port = smallest_port;
		goto have_port;
	}
	offset--;
	if (!(offset & 1))
		goto other_parity_scan;

	if (attempt_half == 1) {
		/* OK we now try the upper half of the range */
		attempt_half = 2;
		goto other_half_scan;
	}
	return ret;

tb_not_found:
	//对于tcp, tcp_prot, tcp_hashinfo

	//如果在哈希得到的链表中没有找到对应端口的元素,则新建一个插入到哈希链表中
	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
				     net, head, port);
	if (!tb)
		goto fail_unlock;
tb_found:
	//端口被占用
	if (!hlist_empty(&tb->owners)) {
		if (sk->sk_reuse == SK_FORCE_REUSE)
			goto success;

		if (((tb->fastreuse > 0 && reuse) ||
		     (tb->fastreuseport > 0 &&
		      !rcu_access_pointer(sk->sk_reuseport_cb) &&
		      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
		    smallest_size == -1)
			goto success;
		if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
			if ((reuse ||
			     (tb->fastreuseport > 0 &&
			      sk->sk_reuseport &&
			      !rcu_access_pointer(sk->sk_reuseport_cb) &&
			      uid_eq(tb->fastuid, uid))) &&
			    smallest_size != -1 && --attempts >= 0) {
				spin_unlock_bh(&head->lock);
				goto again;
			}
			goto fail_unlock;
		}
		if (!reuse)
			tb->fastreuse = 0;
		if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
			tb->fastreuseport = 0;
	} else {
		tb->fastreuse = reuse;
		if (sk->sk_reuseport) {
			tb->fastreuseport = 1;
			tb->fastuid = uid;
		} else {
			tb->fastreuseport = 0;
		}
	}
success:
	if (!inet_csk(sk)->icsk_bind_hash)
		inet_bind_hash(sk, tb, port);
	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
	ret = 0;

fail_unlock:
	spin_unlock_bh(&head->lock);
	return ret;
}

该函数除了得到可用的端口外,主要作用是将sock对象加入&hinfo->bhash哈希表。我们知道哈希表可以加快搜索的速度,这用于在数据接收过程中。在数据接收过程中,我们接收发往本机的数据报,根据是目的IP。但是系统中有很多进程,很多socket连接,并不知道数据是要给哪个进程的。通过端口号和哈希表快速定位接收数据的sock,然后将数据放到sock的接收队列中,等待用户线程取数据,这样完成了一次网络通信。

最后,总的来说bind系统调用,本质上是根据端口号,将socket调用得到的socket对象加入bind哈希表。




  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值