一、基本示例
#include <stdio.h>
#include <sys/socket.h> // socket()
#include <arpa/inet.h> // inet_addr()
#include <netinet/in.h> // sockaddr_in{} INADDR_ANY
#include <unistd.h> // close()
#include <errno.h> // errno
#include <string.h> // strerror()
#include <stdbool.h> // true
int main(){
int bYes=1;
struct sockaddr_in inAddr;
printf("\n *** cpl time [%s] *** \n",__TIME__);
inAddr.sin_family = AF_INET;
inAddr.sin_port = htons(10240);
//inAddr.sin_addr.s_addr = inet_addr("0.0.0.0");
inAddr.sin_addr.s_addr = inet_addr("127.0.0.1");
//inAddr.sin_addr.s_addr = inet_addr("192.168.28.56");
int listenfd = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
if (listenfd < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEPORT,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
if(0 > bind(listenfd,(struct sockaddr*)&inAddr,sizeof(struct sockaddr))) {
printf("bind [%d][%s]",errno, strerror(errno));
return -1;
}
if(0 > listen(listenfd,1)) {
printf("listen [%d][%s]",errno, strerror(errno));
return -1;
}
while(1) {
struct sockaddr_in recvAddr;
socklen_t stulen = sizeof(struct sockaddr);
int fd = accept(listenfd,(struct sockaddr*)&recvAddr,&stulen);
struct linger so_linger = {
.l_onoff = true, // 开启 linger 控制
.l_linger = 0 // close_wait 时间 0s
};
setsockopt(fd,SOL_SOCKET,SO_LINGER,&so_linger,sizeof(so_linger));
if( fd > 0 ) {
char buf[1024] = {0};
int bt = recv(fd,buf,sizeof(buf),0);
printf("recv [%d] from [%s] :\n%s",bt,inet_ntoa(recvAddr.sin_addr),buf);
send(fd,buf,sizeof(buf),0);
close(fd);
} else {
printf("err [%d][%s]", errno, strerror(errno));
}
}
close(listenfd);
return 0;
}
上面的服务器 迭代 处理客户请求,被称为 “迭代服务器”(iterative server);
同时处理多个请求 的服务器,被称为 “并发服务器”(concurrent server),后续介绍;
二、参数解析
2.1 结构体
struct sockaddr
内核 tcp / ip 协议栈实现:
// include\uapi\linux\socket.h
#define _K_SS_MAXSIZE 128 /* Implementation specific max size (see RFC2553) */
typedef unsigned short __kernel_sa_family_t;
// 这里本来 使用 匿名联合体 保证 内存对齐,这里简化了
struct __kernel_sockaddr_storage {
struct {
__kernel_sa_family_t ss_family; /* address family */
char __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
};
};
POSIX API 实现:
//include\linux\socket.h
typedef __kernel_sa_family_t sa_family_t;
//1003.1g requires sa_family_t and that sa_data is char.
struct sockaddr {
sa_family_t sa_family; /* address family, AF_xxx */
char sa_data_min[14]; /* Minimum 14 bytes of protocol address */
};
struct sockaddr_in
//include\uapi\linux\in.h
struct in_addr { __be32 s_addr; };
#define __SOCK_SIZE__ 16 /* sizeof (struct sockaddr) */
struct sockaddr_in {
__kernel_sa_family_t sin_family; /* Address family */
__be16 sin_port; /* Port number */
struct in_addr sin_addr; /* Internet address */
/* Pad to size of `struct sockaddr'. */
unsigned char __pad[__SOCK_SIZE__ - sizeof(short int) -
sizeof(unsigned short int) - sizeof(struct in_addr)];
};
//include\linux\byteorder\generic.h
#define ___htonl(x) __cpu_to_be32(x)
#define ___htons(x) __cpu_to_be16(x)
#define ___ntohl(x) __be32_to_cpu(x)
#define ___ntohs(x) __be16_to_cpu(x)
socket 可以绑定 AF_INET,AF_UNIX 等各种通信域 ( communication domain ) 协议簇;
sockaddr.sa_data 含义会随 通信域 变化,sockaddr_in 就是 sockaddr 在 AF_INET 域内特化的形式;
使用 sockaddr_in 时要注意 保持网络字节序,big-endian,或者 bigger end,低位地址更高;
int inet_aton (const char *name, struct in_addr *addr)
# MT-Safe locale | AS-Safe | AC-Safe
uint32_t inet_addr (const char *name)
# MT-Safe locale | AS-Safe | AC-Safe
字符串 到 整型 的转换可以使用 glibc 库函数;
listen() 将 sockfd 标记为被动套接字,作为 accept 的入参;
SYNOPSIS
int listen(int sockfd, int backlog);
DESCRIPTION
listen() marks the socket referred to by sockfd as a passive socket, that is, as a socket that will be used to accept
incoming connection requests using accept(2).
The sockfd argument is a file descriptor that refers to a socket of type SOCK_STREAM or SOCK_SEQPACKET.
The backlog argument defines the maximum length to which the queue of pending connections for sockfd may grow. If a
connection request arrives when the queue is full, the client may receive an error with an indication of ECONNREFUSED
or, if the underlying protocol supports retransmission, the request may be ignored so that a later reattempt at con‐
nection succeeds.
RETURN VALUE
On success, zero is returned. On error, -1 is returned, and errno is set to indicate the error.
几个常用 Inet 地址;
//include\uapi\linux\in.h
/* Address to accept any incoming messages. */
#define INADDR_ANY ((unsigned long int) 0x00000000)
/* Address to send to all hosts. */
#define INADDR_BROADCAST ((unsigned long int) 0xffffffff)
/* Network number for local host loopback. */
#define IN_LOOPBACKNET 127
/* Address to loopback in software to local host. */
#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
2.2 setsockopt
2.2.1 SO_REUSEADDR
man 7 socket
SO_REUSEADDR
Indicates that the rules used in validating addresses supplied in a bind(2) call should allow reuse of local addresses. For AF_INET
sockets this means that a socket may bind, except when there is an active listening socket bound to the address. When the listening
socket is bound to INADDR_ANY with a specific port then it is not possible to bind to this port for any local address. Argument is
an integer boolean flag.
一台设备,可以坐拥多个 IP,比如配有多个网卡;
对于 AF_INET 域,INADDR_ANY(0.0.0.0)作为 IP 通配符,指代当前设备所有 Src IP(包括 127.0.0.1);
对于 套接字 五元组 {SrcIP,SrcPort,DestIP,DestPort,Proto},UNIX 系统 包括 以下两个限制:
- SrcPort 一旦绑定到 活动的监听套接字 上,就无法再绑定其他套接字;
- SrcPort 绑定到 INADDR_ANY 上后就 无法 绑定 任何其他 SrcIP 上;
SO_REUSEADDR 选项能够 打破限制 并运用在以下四种场景中;
2.2.1.1 非活动的监听套接字
- SrcPort 没有 绑定到 活动的 监听套接字上时,(开启 SO_REUSEADDR)能够被再此绑定到 新监听套接字上;
- SrcPort 没有绑定到非活动的监听套接字1 的场景如下:
(1) 启动了一个监听服务器;
(2) 接收 并 派生一个子进程处理请求(代理、负载均衡);
(3) 监听服务器 因 崩溃 等原因 终止运行;
(4) 守护进程 或 脚本重启 监听服务器,此时 SrcPort 正绑定在 子进程 非监听 套接字上;
倘若没有 SO_REUSEADDR,(4)中 重启 监听服务器 会因为 SrcPort 被占用而导致 bind() 失败;
还是以开头 基本示例 为例:
//server.c
// ...
int listenfd = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
if (listenfd < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
//注意这里没有设置 SO_REUSEADDR = 1
//setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes));
if(0 > bind(listenfd,(struct sockaddr*)&inAddr,sizeof(struct sockaddr))) {
printf("bind [%d][%s]",errno, strerror(errno));
return -1;
}
// ...
while(1) {
struct sockaddr_in recvAddr;
socklen_t stulen = sizeof(struct sockaddr);
int fd = accept(listenfd,(struct sockaddr*)&recvAddr,&stulen);
if( fd <= 0 ) continue;
// child
if(fork()==0) {
printf("sub server [%d] connect from [%s]:[%u]\n",
getpid(),inet_ntoa(recvAddr.sin_addr),ntohs(recvAddr.sin_port));
close(listenfd); // fork 子进程后 主动关闭 listenfd
while( (n = recv(fd,szbuf,sizeof(szbuf),0)) > 0) {
printf("sub server [%d], recv [%d] from [%s]:[%u]:%s\n",
getpid(),n,inet_ntoa(recvAddr.sin_addr),ntohs(recvAddr.sin_port),szbuf);
send(fd,szbuf,n,0); // 回显
}
close(fd);
exit(0);
}
// parent
printf("\nthis is listener, continue ...\n");
// 原则上,父进程必须使用 wait() 回收子进程资源
}
close(listenfd);
return 0;
}
> gcc server.c -o server
> ./server &
> netstat -alp |grep 10240
tcp 0 0 0.0.0.0:10240 0.0.0.0:* LISTEN 17023/./server # 父进程开始监听
> netcat 127.0.0.1 10240
hello # netcat 建立的 client 发送 hello
hello # client 收到回显
先使用 netcat 命令工具 与 server 父进程 建立连接,输入 hello 获得 server 的回显;
打开一个新的终端,netstat 查看当前套接字状态,server 父进程(本文 pid =17023)监听套接字 和 subserver 子进程(pid = 17028)服务套接字 均 ”健在“;
杀死 server 父进程,剩下 fork 得到的 subserver 服务套接字(非监听套接字);
重启 server 父进程:
> netstat -alp |grep 10240
tcp 0 0 0.0.0.0:10240 0.0.0.0:* LISTEN 17023/./server # 监听中 server 父进程
tcp 0 0 localhost:10240 localhost:51742 ESTABLISHED 17028/./server # fork 的 subserver 子进程
> kill -9 17023 # 此时杀掉监听父进程,那么端口 10240 只在 子进程 中绑定到普通套接字上,提供回显服务
> ./server & # 此时重启 监听父进程
*** cpl time [17:30:27] ***
bind [98][Address already in use] # 套接字绑定失败,因为 没有开启 RREUSEADDR,即便是
[1]+ Exit 255 ./server
10240 端口 无法被绑定到重启的 父进程 监听套接字 上,开启 REUSEADDR 后,则能够绑定 !!
setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes));
> ./server &
> netstat -alp |grep 10240
tcp 0 0 0.0.0.0:10240 0.0.0.0:* LISTEN 17139/./server # 重启后 父进程 成功绑定监听
tcp 0 0 localhost:10240 localhost:51742 ESTABLISHED 17028/./server # 子进程 还在占用 10240 端口
2.2.1.2 同一个端口多个服务器实例
- 理论上只要每个服务器的 SrcIP 不同,如 INADDR_ANY 和 loopback(127.0.0.1),那么就应该允许重复 bind;
- 树莓 pi 4 + Ubuntu,即便 设置了 SO_REUSEADDR 同样 bind 失败…
// server1 先启动
int bYes = 1;
inAddr.sin_port = htons(10240);
inAddr.sin_addr.s_addr = INADDR_ANY; // = inet_addr("0.0.0.0");
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
...
// server2 后启动
int bYes = 1;
inAddr.sin_port = htons(10240);
inAddr.sin_addr.s_addr = inet_addr("127.0.0.1"); // 或者 192.168.28.56
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
...
> ./server1 &
> ./server2 &
bind [98][Address already in use] # bind 失败
目前 《UNIX网络编程 卷一》给出的解释是2 “很多操作系统已经不允许 对已经绑定了通配地址的端口 再绑定更为明确的地址”;
目的是 为了防止某些 恶意服务 “劫持” 正在提供服务的端口,即便设置了 SO_REUSEADDR,重复绑定 依旧会失败;
问题是,我先启动的 server1 绑定到 127.0.0.1,后启动 server2 绑定到 INADDR_ANY,应该没有问题才对?
2.2.1.3 同一个端口多个IP
// server1 先启动
int bNo = 0;
inAddr.sin_port = htons(10240);
inAddr.sin_addr.s_addr = inet_addr("192.168.28.56"); // 注意 IP
#if 0
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
#endif
...
// server2 后启动
int bYes = 1;
inAddr.sin_port = htons(10240);
inAddr.sin_addr.s_addr = inet_addr("127.0.0.1"); // 注意 IP
#if 0
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEADDR,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
#endif
...
> ./server1 &
> ./server2 &# bind 成功
根据实验的结果:即便没有设置 SO_REUSEADDR,同一个 SrcPort 也可以绑定到不同 SrcIP;
2.2.1.4 完全重复的捆绑(completely duplicate binding)
- 目前常见协议中,仅 UDP 协议支持;
- 这个特性主要用于多播(multicast)时,同一个主机上同时运行一个应用程序的多个副本;UDP 数据包是 多播(或者是 广播 broadcast)时,给每个套接字送一个副本;如果是单播,就只给单个套接字;详见 《UNIX网络编程 卷一》,这里不做讨论;
2.3 SO_REUSEPORT
man 7 socket
SO_REUSEPORT (since Linux 3.9)
Permits multiple AF_INET or AF_INET6 sockets to be bound to an identical socket address. This option must be set on each socket
(including the first socket) prior to calling bind(2) on the socket. To prevent port hijacking, all of the processes binding to the
same address must have the same effective UID. This option can be employed with both TCP and UDP sockets.
For TCP sockets, this option allows accept(2) load distribution in a multi-threaded server to be improved by using a distinct lis‐
tener socket for each thread. This provides improved load distribution as compared to traditional techniques such using a single
accept(2)ing thread that distributes connections, or having multiple threads that compete to accept(2) from the same socket.
For UDP sockets, the use of this option can provide better distribution of incoming datagrams to multiple processes (or threads) as
compared to the traditional technique of having multiple processes compete to receive datagrams on the same socket.
这个是重量级!允许 真·完全重复的捆绑;
只有一个要求,每个 套接字,都在 bind 前设置过 SO_REUSEPORT;
// server1
int bNo = 0;
inAddr.sin_port = htons(10240);
inAddr.sin_addr.s_addr = INADDR_ANY;
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEPORT,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
if(0 > bind(listenfd,(struct sockaddr*)&inAddr,sizeof(struct sockaddr))) {
printf("bind [%d][%s]",errno, strerror(errno));
return -1;
}
...
// server2
int bYes = 1;
inAddr.sin_port = htons(10240);
inAddr.sin_addr.s_addr = inet_addr("192.168.28.56");
if (setsockopt(listenfd,SOL_SOCKET,SO_REUSEPORT,&bYes,sizeof(bYes)) < 0) {
printf("[%d][%s]",errno, strerror(errno));
return -1;
}
if(0 > bind(listenfd,(struct sockaddr*)&inAddr,sizeof(struct sockaddr))) {
printf("bind [%d][%s]",errno, strerror(errno));
return -1;
}
...
> ./server1 &
> ./server2 &
> ./server2 &
> netstat -lp | grep 10240
tcp 0 0 0.0.0.0:10240 0.0.0.0:* LISTEN 3722/./server1
tcp 0 0 hk-desktop:10240 0.0.0.0:* LISTEN 3670/./server2
tcp 0 0 hk-desktop:10240 0.0.0.0:* LISTEN 3669/./server2
2.4 struct linger
struct linger so_linger = {
.l_onoff = true, // 开启 linger 控制
.l_linger = 0 // close_wait 时间 0s
};
setsockopt(fd,SOL_SOCKET,SO_LINGER,&so_linger,sizeof(so_linger));
四次挥手的最后,套接字会持续 TIME_WAIT 等待 2 个 MSL 时间后再 close 套接字;
设置 l_onoff = true 后,关闭套接字(或者进程崩溃),内核 只会等待 指定的 l_linger 时间,便抛弃 套接字 内核缓冲区中 残留的数据3,不等 2 倍 MSL 时间, 也不重传;