UNP中总结了几种IO模型,阻塞式的IO是在数据准备好之前,进程会阻塞于相应的系统调用(睡眠),默认情况下所有的套接字都是阻塞的;非阻塞是IO,即轮询,每次在一次系统调用的时候,如果数据没有准备好,进程不阻塞而是立即返回一个错误,这样会耗费CPU;而select 和 poll 不过是站在更高的抽象层次上(代理),不是阻塞在每个系统调用上,而是同时监控很多描述符,当有一个或多个注册的IO事件发生时,我们就需要轮询一遍所有的描述符(流),进而查看是否是否ready,同样不适合高并发的server 。epoll采用事件驱动模型,事件复杂度是O(1) , 描述符处于非阻塞模式下,当有事件发生时,直接进入相应的处理逻辑。(和 libevent, libev , Netty中的机制相比哪种性能更优呢?)下面是简单的入门实例。
启动server后,通过 nc 命令可以发送TCP消息,运行结果如下:
vonzhou@de15:~/Coding/unp/tcpepool$ ./server 1234
Accepted connection on descriptor 5 (host=127.0.0.1, port=60598)
vonzhou hello
Closed connection on descriptor 5
Accepted connection on descriptor 5 (host=127.0.0.1, port=60599)
i love linux
Closed connection on descriptor 5
^C
Accepted connection on descriptor 5 (host=127.0.0.1, port=60598)
vonzhou hello
Closed connection on descriptor 5
Accepted connection on descriptor 5 (host=127.0.0.1, port=60599)
i love linux
Closed connection on descriptor 5
^C
代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/epoll.h>
#include <errno.h>
#define MAXEVENTS 64
//set this socket non blocking
static int make_socket_non_blocking (int sfd){
int flags, s;
flags = fcntl (sfd, F_GETFL, 0);
if (flags == -1) {
perror ("fcntl");
return -1;
}
flags |= O_NONBLOCK;
s = fcntl (sfd, F_SETFL, flags);
if (s == -1) {
perror ("fcntl");
return -1;
}
return 0;
}
//根据端口号,创建套接字
static int create_and_bind (char *port){
struct addrinfo hints;
struct addrinfo *result, *rp;
int s, sfd;
memset (&hints, 0, sizeof (struct addrinfo));
hints.ai_family = AF_UNSPEC; //IPv4 IPv6通用
hints.ai_socktype = SOCK_STREAM; /* We want a TCP socket */
hints.ai_flags = AI_PASSIVE; //监听套接字;
//根据暗示得到所有可用的addrinfo
s = getaddrinfo (NULL, port, &hints, &result);
if (s != 0){
fprintf (stderr, "getaddrinfo: %s\n", gai_strerror (s));
return -1;
}
//use the first addr to create socket
for (rp = result; rp != NULL; rp = rp->ai_next) {
sfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sfd == -1)
continue;
s = bind (sfd, rp->ai_addr, rp->ai_addrlen);
if (s == 0) {
//bind success
break;
}
close (sfd);//if bind failed we close this socket
}
if (rp == NULL) {
fprintf (stderr, "Could not bind\n");
return -1;
}
freeaddrinfo (result);
return sfd;
}
int main (int argc, char *argv[]){
int sfd, s;
int efd;
struct epoll_event event;
struct epoll_event *events;
if (argc != 2){
fprintf (stderr, "Usage: %s [port]\n", argv[0]);
exit (EXIT_FAILURE);
}
sfd = create_and_bind (argv[1]);
if (sfd == -1)
abort ();
s = make_socket_non_blocking (sfd);
if (s == -1)
abort ();
//listen for connection coming
s = listen (sfd, SOMAXCONN);
if (s == -1) {
perror ("listen");
abort ();
}
//create a epoll object
efd = epoll_create1 (0);
if (efd == -1) {
perror ("epoll_create");
abort ();
}
event.data.fd = sfd;
//register the event we interested
event.events = EPOLLIN | EPOLLET;//边沿触发(Level Triggered,电平触发)
s = epoll_ctl (efd, EPOLL_CTL_ADD, sfd, &event);
if (s == -1){
perror ("epoll_ctl");
abort ();
}
/* Buffer where events are returned */
events = calloc (MAXEVENTS, sizeof event);
/* The event loop */
while (1) {
int n, i;
n = epoll_wait (efd, events, MAXEVENTS, -1);
//some events happened
for (i = 0; i < n; i++) {
if ((events[i].events & EPOLLERR) ||
(events[i].events & EPOLLHUP) ||
(!(events[i].events & EPOLLIN))){
/* An error has occured on this fd, or the socket is not
ready for reading (why were we notified then?) */
fprintf (stderr, "epoll error\n");
close (events[i].data.fd);
continue;
}
else if (sfd == events[i].data.fd){
//the listening socket become ready indicate the new connection coming
while (1){
struct sockaddr in_addr;
socklen_t in_len;
int infd;
char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
in_len = sizeof in_addr;
infd = accept (sfd, &in_addr, &in_len);
if (infd == -1) {//some error occured
if ((errno == EAGAIN) ||(errno == EWOULDBLOCK)){
/* We have processed all incoming connections. */
break;
}
else {
perror ("accept");
break;
}
}
//get the host and service expression from this socket addr
s = getnameinfo (&in_addr, in_len, hbuf, sizeof hbuf, sbuf, sizeof sbuf,NI_NUMERICHOST | NI_NUMERICSERV);
//
if (s == 0) {
printf("Accepted connection on descriptor %d (host=%s, port=%s)\n", infd, hbuf, sbuf);
}
// make this connected socket nonblocking,add it to the event loop
s = make_socket_non_blocking (infd);
if (s == -1)
abort ();
event.data.fd = infd;
event.events = EPOLLIN | EPOLLET;// readable , and edge trigger
s = epoll_ctl (efd, EPOLL_CTL_ADD, infd, &event);
if(s == -1){
perror ("epoll_ctl");
abort ();
}
}
continue;
}
else {
// events about the connected socket happened,we should read all the data out
// bcse in the edge mode
int done = 0;
while (1){
ssize_t count;
char buf[512];
count = read (events[i].data.fd, buf, sizeof buf);
if (count == -1) {
//在非阻塞模式下errno == EAGAIN不代表错误,说明已读到全部数据;
if (errno != EAGAIN){
perror ("read");//some other error
done = 1;
}
break;
}
else if(count == 0){
//EOF, client close this connection (recv FIN)
done = 1;
break;
}
/* Write the buffer to standard output */
s = write (1, buf, count);
if(s == -1){
perror ("write");
abort ();
}
}
if (done){//每次处理完一个客户端的请求就关闭对应的connfd;
printf ("Closed connection on descriptor %d\n", events[i].data.fd);
/* Closing the descriptor will make epoll remove it
from the set of descriptors which are monitored. */
close (events[i].data.fd);
}
}
}
}
free (events);
close (sfd);
return EXIT_SUCCESS;
}
结构体epoll_event被用于注册希望监听的事件和反馈发生的事件,定义如下:
typedef union epoll_data {
void *ptr;
int fd;
__uint32_t u32;
__uint64_t u64;
} epoll_data_t;//保存触发事件的某个文件描述符相关的数据
struct epoll_event {
__uint32_t events; /* epoll event */
epoll_data_t data; /* User data variable */
};
-------------------
#include <sys/socket.h>
#include <netdb.h>
int getnameinfo(const struct sockaddr *sa, socklen_t salen,
char *host, size_t hostlen,
char *serv, size_t servlen, int flags);
getnameinfo函数以协议无关的方式从套接字地址格式得到字符展现形式
NI_NUMERICHOST,NI_NUMERICSERV 分别以数字的形式得到主机地址和服务端口
-------
#include<netdb.h>
int getaddrinfo( const char *hostname, const char *service, const struct addrinfo *hints, struct addrinfo **result );
参数说明
hostname:一个主机名或者地址串(IPv4的点分十进制串或者IPv6的16进制串)
service:服务名可以是十进制的端口号,也可以是已定义的服务名称,如ftp、http等
hints:可以是一个空指针,也可以是一个指向某个addrinfo结构体的指针,调用者在这个结构中填入关于期望返回的信息类型的暗示
result:本函数通过result指针参数返回一个指向addrinfo结构体链表的指针。