概念
IO复用是最常用的IO通知机制,它指的是,应用程序通过IO复用函数向内核注册一组事件,内核通过IO复用函数把其中就绪的事件通知给应用程序。因此,IO复用函数本身是阻塞的。Linux上常用的IO复用函数有:
- select
- poll
- epoll_wait
阻塞IO、IO复用、信号驱动的IO(SIGIO)都是同步IO模型。因为这三种IO模型中,IO的读写操作,都是在IO事件发生之后,由应用程序完成。
对于异步IO而言,用户可以直接对IO执行读写操作,这些操作告诉内核用户读写缓冲区的位置,这些操作总是立即返回的,不论IO是否阻塞,因为真正的读写操作已经由内核接管。
同步IO:由应用程序执行IO操作,读操作:将数据从内核缓冲区读入用户缓冲区;写操作:从用户缓冲区写入到内核缓冲区。同步IO向应用程序通知的是IO就绪事件。
异步IO:数据在内核缓冲区和用户缓冲区之间的移动搬运是由内核在后台完成的。异步IO向应用程序通知的IO完成事件。
常用的三种IO复用方式的区别对比:
poll和epoll使用上的差别:
/* 如何索引 poll 返回的就绪文件描述符 */
int ret = poll( fds, MAX_EVENT_NUMBER, -1 );
/* 必须遍历所有已注册文件描述符并找到其中的就绪者(可以利用 ret 稍作优化) */
for( int i = 0; i < MAX_EVENT_NUMBER; ++i){
if( fds[i].revents & POLLIN ) /* 判断第 i 个文件描述符是否就绪 */
{
int sockfd = fds[i].fd;
/* 处理 sockfd */
}
}
/* 如何索引 epoll 返回的就绪文件描述符 */
int ret = epoll_wait( epollfd, events, MAX_EVENT_NUMBER, -1 );
/* 仅遍历就绪的 ret 个文件描述符 */
for( int i = 0; i < ret; i++ ){
int sockfd = events[i].data.fd;
/* sockfd 肯定就绪,直接处理 */
}
select
可以维护3个设备描述符的集合(fd_set),分别是可读事件,可写事件和异常事件。用户可以向相应集合中注册需要监听的设备描述符,当有事件发生时,需要逐一校验(FD_ISSET)是哪个设备描述符上,产生了什么事件。由于内核会修改相应的集合,因此每次监听的时候,都需要复位监听集合。
支持并发的selectServer
#include <iostream>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <cassert>
#include <vector>
#include <string.h>
#include <arpa/inet.h>
using namespace std;
int main()
{
// Create a socket (IPv4 TCP)
int listenfd = socket(AF_INET, SOCK_STREAM, 0);
if(listenfd == -1)
{
cout << "create socket fail: " << strerror(errno) << endl;
return 0;
}
// enable address reuse
int on = 1;
if(-1 == setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)))
{
cout << "setsockopt SO_REUSEADDR failed! error is " << strerror(errno);
close(listenfd);
return 0;
}
// bind address: Listen to port 9999 on any address
sockaddr_in server_addr;
server_addr.sin_family = AF_INET;
server_addr.sin_addr.s_addr = INADDR_ANY;
server_addr.sin_port = htons(9999);
auto ret = bind(listenfd, reinterpret_cast<sockaddr *>(&server_addr),
sizeof(sockaddr));
if(ret == -1)
{
cout << "bind socket fail: " << strerror(errno) << endl;
close(listenfd);
return 0;
}
// Start listening. Hold at most 10 connections in the queue
// 此处不会阻塞:设置在监听模式
ret = listen(listenfd, 10);
if(ret == -1)
{
cout << "listen socket fail: " << strerror(errno) << endl;
close(listenfd);
return 0;
}
cout << "listen successs " << endl;
char buff[1024]; // 读写缓存
std::vector<int> client_fd; // 已连接的设备描述符
int max_fd = listenfd; // 维护一个最大的设备描述符
fd_set read_fds;
fd_set write_fds;
fd_set exception_fds;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
FD_ZERO(&exception_fds);
// 向read_fds中添加对listenfd的监控
FD_SET(listenfd, &read_fds);
while (1)
{
// 每次调用select前都要重新复位关心的文件描述符,因为事件发生后,文件描述符
// 集合会被内核修改
auto r_fds = read_fds;
auto e_fds = exception_fds;
// 每次调用select前都要重新复位关心的文件描述符,因为事件发生后,文件描述符
// 集合会被内核修改,这样子内核修改的只是个临时变量
// TIMTOUT为null 表明一直阻塞
cout << "start select" << endl;
ret = select(max_fd + 1, &r_fds, nullptr, &e_fds, nullptr);
if(ret < 0)
{
cout << "select fail:" << strerror(errno) << endl;
break;
}
cout << "select success: " << ret << endl;
// 对于listenfd上的可读事件 表明有新的连接
if(FD_ISSET(listenfd, &r_fds))
{
cout << "start accept" << endl;
sockaddr_in client_addr;
socklen_t client_addr_len = sizeof(sockaddr);
int connfd = accept(listenfd, reinterpret_cast<sockaddr *>(&client_addr),
&client_addr_len);
if(connfd < 0)
{
cout << "accept fail:" << strerror(errno) << endl;
close(listenfd);
return 0;
}
getpeername(connfd, reinterpret_cast<sockaddr *>(&client_addr), &client_addr_len);
cout << "accept socket success, client addr is " << inet_ntoa(client_addr.sin_addr) \
<< ":" << ntohs(client_addr.sin_port) << endl;
client_fd.push_back(connfd);
max_fd = std::max(max_fd, connfd);
// 添加到监测
FD_SET(connfd, &read_fds);
FD_SET(connfd, &write_fds);
FD_SET(connfd, &exception_fds);
}
for(auto it = client_fd.begin(); it != client_fd.end(); ++it)
{
if(FD_ISSET(*it, &r_fds))
{
memset(buff, '\0', sizeof(buff));
auto read_size = recv(*it, buff, sizeof(buff) - 1, MSG_DONTWAIT);
if(read_size == 0)
{
cout << "client closed!" << endl;
close(*it);
/* 解除select对此文件描述符的监控 */
FD_CLR(*it, &read_fds);
FD_CLR(*it, &write_fds);
FD_CLR(*it, &exception_fds);
client_fd.erase(it);
it--;
continue;
}
else if (read_size < 0)
{
cout << "read error:" << strerror(errno) << endl;
close(*it);
/* 解除select对此文件描述符的监控 */
FD_CLR(*it, &read_fds);
FD_CLR(*it, &write_fds);
FD_CLR(*it, &exception_fds);
client_fd.erase(it);
it--;
continue;
}
else
{
cout << "read " << read_size << " data:" << buff << endl;
}
}
// 写事件
// if(FD_ISSET(*it, &w_fds))
// {
// cout << "write fds " << endl;
// }
// 异常事件
if(FD_ISSET(*it, &e_fds))
{
memset(buff, '\0', sizeof(buff));
auto rd_size = recv(*it, buff, sizeof(buff) - 1, MSG_OOB);
if(rd_size <= 0)
{
cout << "read exception error" << endl;
continue;
}
cout << "read exception " << rd_size << " data:" << buff << endl;
}
}
}
for(const auto &index : client_fd)
{
close(index);
}
close(listenfd);
return 0;
}
poll
poll和select类似,它是将需要监听的设备描述符和事件都维护在pollfd结构数组中,
// poll的原型
#include <poll.h>
int poll ( struct pollfd* fds, nfds_t nfds, int timeout );
- fds: 是一个 pollfd 结构类型的数组,它指定所有我们感兴趣的文件描述符上发生的可读、可写和异常事件。定义如下:
struct pollfd { int fd; /* file descriptor */ short events; /* requested events */ short revents; /* returned events */ }; fd: 指定需要监听的设备描述符; events: 监听fd上的哪些事件; revents: 由内核修改,以通知用户程序fd上实际发生了哪些事件,内核会自己复位。 同样需要用户程序去逐一校验revents,来获取相应的事件通知。
- nfds: 指定被监听事件集合 fds 的大小
- timeout: 指定 poll 的超时值,单位是毫秒。当 timeout == -1 时,poll 调用将永远阻塞,直到某个时间发生;当 timeout == 0 时,poll 调用将立即返回
监听的事件
支持并发的poll
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <poll.h>
#include <errno.h>
#include <vector>
#include <iostream>
using namespace std;
#define BUFFER_SIZE 1024
int main(int argc, char *argv[])
{
// 1.创建tcp监听套接字
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
int on = 1;
if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)))
{
cout << "setsockopt SO_REUSEADDR failed! error is " << strerror(errno);
close(sockfd);
return 0;
}
// 2.绑定sockfd
struct sockaddr_in my_addr;
bzero(&my_addr, sizeof(my_addr));
my_addr.sin_family = AF_INET;
my_addr.sin_port = htons(8000);
my_addr.sin_addr.s_addr = htonl(INADDR_ANY);
bind(sockfd, (struct sockaddr *)&my_addr, sizeof(my_addr));
// 3.监听listen
// Start listening. Hold at most 10 connections in the queue
// 此处不会阻塞:设置在监听模式
auto ret = listen(sockfd, 10);
if(ret == -1)
{
cout << "listen socket fail: " << strerror(errno) << endl;
close(sockfd);
return 0;
}
cout << "listen successs " << endl;
// 4.poll相应参数准备
std::vector<pollfd> clients;
clients.emplace_back(pollfd{sockfd, POLLIN});
// 5.对已连接的客户端的数据处理
while(1)
{
char buff[BUFFER_SIZE];
// 对加入poll结构体数组所有元素进行监测
int ret = poll(clients.data(), clients.size(), -1);
if(ret < 0)
{
cout << "poll fail: " << strerror(errno) << endl;
break;
}
// 5.1 监测sockfd(监听套接字)是否存在连接
if((clients[0].revents & POLLIN) == POLLIN )
{
sockaddr_in client_addr;
socklen_t client_addr_len = sizeof(sockaddr);
int connfd = accept(sockfd,
reinterpret_cast<sockaddr *>(&client_addr),
&client_addr_len);
if(connfd < 0)
{
cout << "accept fail:" << strerror(errno) << endl;
continue;
}
getpeername(connfd, reinterpret_cast<sockaddr *>(&client_addr), &client_addr_len);
cout << "accept socket success, client addr is " << inet_ntoa(client_addr.sin_addr) \
<< ":" << ntohs(client_addr.sin_port) << endl;
// 将提取到的connfd放入poll结构体数组中,以便于poll函数监测
clients.emplace_back(pollfd{connfd, POLLIN});
// 如果没有就绪的描述符,就继续poll监测,否则继续向下看
if(--ret <= 0)
{
continue;
}
}
// 5.2继续响应就绪的描述符
for(auto iter = clients.begin() + 1; iter != clients.end(); iter++)
{
if(iter->revents & (POLLIN | POLLERR))
{
// 5.2.1接受客户端数据
ssize_t len = 0;
memset(buff, '\0', sizeof(buff));
len = recv(iter->fd, buff, sizeof(buff), 0);
if(len < 0)
{
if((errno == EAGAIN) || (errno == EWOULDBLOCK))
{
cout << "read later" << endl;
}
else
{
cout << "error happend: " << strerror(errno) << endl;
close(iter->fd);
iter = clients.erase(iter);
iter--;
}
}
else if(len == 0)//客户端关闭连接
{
cout << "close fd[" << iter->fd << "]" << endl;
close(iter->fd);
iter = clients.erase(iter);
iter--;
}
else //正常接收到服务器的数据
{
cout << "read " << len << " data:[" << buff << "]" << endl;
send(iter->fd, buff, static_cast<size_t>(len), 0);
}
}
}
}
return 0;
}
epoll
epoll则会把用户关心的设备描述符在内核上维护一个事件表,通用epoll_create创建这个事件表,并由一个设备描述符指向这个事件表。可以通过epoll_ctl来操作该事件表,进行增删改。通过epoll_wait等待事件发生,并返回已就绪的事件表,而不是向poll和select那样,需要用户再去遍历校验哪个设备描述符上发生了事件。同时epoll还支持ET边沿触发模式。
支持并发的epollServer
#include <iostream>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <cassert>
#include <vector>
#include <string.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <sys/epoll.h>
#define MAX_EVENT_NUM 1024
#define BUFFER_SIZE 10
using namespace std;
// 将文件描述符设置为非阻塞的
int setNoBlocking(int fd)
{
int old_option = fcntl(fd, F_GETFL);
int new_option = old_option | O_NONBLOCK;
fcntl(fd, F_SETFL, new_option);
return old_option;
}
// 将文件描述符fd的EPOLLIN注册到epollfd指示的epoll内核事件表中,
// 参数enbale_et指定是否启用ET模式
void addfd(int epollfd, int fd, bool enable_et)
{
epoll_event event;
event.data.fd = fd;
event.events = EPOLLIN;
if(enable_et)
{
event.events |= EPOLLET;
}
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
setNoBlocking(fd);
}
// LT模式的工作流程
void LT(epoll_event *events, int num, int epollfd, int listenfd)
{
char buff[BUFFER_SIZE];
for(int i = 0; i < num; i++)
{
int sockfd = events[i].data.fd;
if(sockfd == listenfd)
{
sockaddr_in client_addr;
socklen_t client_addr_len = sizeof(sockaddr);
int connfd = accept(listenfd, reinterpret_cast<sockaddr *>(&client_addr),
&client_addr_len);
if(connfd < 0)
{
cout << "accept fail:" << strerror(errno) << endl;
continue;
}
getpeername(connfd, reinterpret_cast<sockaddr *>(&client_addr), &client_addr_len);
cout << "accept socket success, client addr is " << inet_ntoa(client_addr.sin_addr) \
<< ":" << ntohs(client_addr.sin_port) << endl;
addfd(epollfd, connfd, false);
}
// 读事件就绪
else if(events[i].events & EPOLLIN)
{
// 只要socket该缓存中还有未读出的数据,这段代码就会被触发
cout << "LT EPOLLIN events" << endl;
memset(buff, '\0', sizeof(buff));
auto read_size = recv(sockfd, buff, sizeof(buff) - 1, 0);
if(read_size == 0)
{
cout << "client closed!" << endl;
close(sockfd);
continue;
}
else if (read_size < 0)
{
cout << "read error:" << strerror(errno) << endl;
close(sockfd);
continue;
}
else
{
cout << "read " << read_size << " data:" << buff << endl;
}
}
else
{
cout << "something else happend: " << events[i].events << endl;
}
}
}
// ET工作流程
void ET(epoll_event *events, int num, int epollfd, int listenfd)
{
char buff[BUFFER_SIZE];
for(int i = 0; i < num; i++)
{
int sockfd = events[i].data.fd;
if(sockfd == listenfd)
{
sockaddr_in client_addr;
socklen_t client_addr_len = sizeof(sockaddr);
int connfd = accept(listenfd, reinterpret_cast<sockaddr *>(&client_addr),
&client_addr_len);
if(connfd < 0)
{
cout << "accept fail:" << strerror(errno) << endl;
continue;
}
getpeername(connfd, reinterpret_cast<sockaddr *>(&client_addr), &client_addr_len);
cout << "accept socket success, client addr is " << inet_ntoa(client_addr.sin_addr) \
<< ":" << ntohs(client_addr.sin_port) << endl;
addfd(epollfd, connfd, true);
}
// 读事件就绪
else if(events[i].events & EPOLLIN)
{
// 这段代码不会被重复触发,所以我们循环读取数据,以确保把socket读缓存中的所有
// 数据读走
cout << "ET EPOLLIN events" << endl;
while(1)
{
memset(buff, '\0', sizeof(buff));
auto read_size = recv(sockfd, buff, sizeof(buff) - 1, 0);
if(read_size == 0)
{
cout << "client closed!" << endl;
close(sockfd);
break;
}
else if (read_size < 0)
{
// 对于非阻塞IO,下面的条件成立表示数据已经全部读走。此后,epoll就能
// 再次触发sockfd上的EPOLLIN事件,以驱动下一次操作
if((errno == EAGAIN) || (errno == EWOULDBLOCK))
{
cout << "read later" << endl;
break;
}
else
{
cout << "error happend: " << strerror(errno) << endl;
close(sockfd);
break;
}
}
else
{
cout << "read " << read_size << " data:" << buff << endl;
}
}
cout << "ET EPOLLIN events END" << endl;
}
else
{
cout << "something else happend: " << events[i].events << endl;
}
}
}
int main()
{
// Create a socket (IPv4 TCP)
int listenfd = socket(AF_INET, SOCK_STREAM, 0);
if(listenfd == -1)
{
cout << "create socket fail: " << strerror(errno) << endl;
return 0;
}
// enable address reuse
int on = 1;
if(-1 == setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)))
{
cout << "setsockopt SO_REUSEADDR failed! error is " << strerror(errno);
close(listenfd);
return 0;
}
// bind address: Listen to port 9999 on any address
sockaddr_in server_addr;
server_addr.sin_family = AF_INET;
server_addr.sin_addr.s_addr = INADDR_ANY;
server_addr.sin_port = htons(9999);
auto ret = bind(listenfd, reinterpret_cast<sockaddr *>(&server_addr),
sizeof(sockaddr));
if(ret == -1)
{
cout << "bind socket fail: " << strerror(errno) << endl;
close(listenfd);
return 0;
}
// Start listening. Hold at most 10 connections in the queue
// 此处不会阻塞:设置在监听模式
ret = listen(listenfd, 10);
if(ret == -1)
{
cout << "listen socket fail: " << strerror(errno) << endl;
close(listenfd);
return 0;
}
cout << "listen successs " << endl;
epoll_event events[MAX_EVENT_NUM];
int epollfd = epoll_create(10);
if(epollfd == -1)
{
cout << "epoll_create fail: " << strerror(errno) << endl;
close(listenfd);
return 0;
}
addfd(epollfd, listenfd, true);
while(1)
{
cout << "in epoll_wait" << endl;
int ret = epoll_wait(epollfd, events, MAX_EVENT_NUM, -1);
if(ret < 0)
{
cout << "epoll_wait fail: " << strerror(errno) << endl;
break;
}
cout << "end epoll_wait: " << ret << endl;
ET(events, ret, epollfd, listenfd);
}
close(epollfd);
close(listenfd);
return 0;
}