Linux epoll详解


epoll机制相比select/poll机制能更有效地实现描述符的多路复用(支持更多的描述符,处理效率更高[具体机制这边不展开了]),本文从编程的角度做一个介绍。

epoll接口函数

头文件: #include<sys/epoll.h>
可以通过man epoll查看对应的帮助信息
最大描述符限制:/proc/sys/fs/epoll/max_user_watches

创建epoll实例

    int epoll_create(int size); 
    int epoll_create1(int flag); 

epoll_create中,size只是给内核的一个维数提示,并不是队列中的最大数,Now days, size is ignored since Linux 2.6.8
epoll_create1中flag取值如下:

  • 0:epoll_create1 == epoll_create (size argument is dropped)
  • EPOLL_CLOEXEC:含义同open函数的O_CLOEXEC选项;当执行execve创建新进程时,打开的描述符自动关闭
    p.s: 当使用完毕时,需要调用close关闭epoll实例句柄

管理epoll事件

    int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event);  

参数说明:

  • epfd: epoll_create返回的epoll实例
  • op: 对应的操作
  • fd: 监听的fd
  • event: 监听的事件
    其中op取值如下:
  • EPOLL_CTL_ADD:添加监听的事件
  • EPOLL_CTL_DEL:删除监听的事件
  • EPOLL_CTL_MOD:修改监听的事件
    struct epoll_event定义如下:
typedef union epoll_data
{
  void *ptr;
  int fd;
  uint32_t u32;
  uint64_t u64;
} epoll_data_t;

struct epoll_event
{
  uint32_t events;	/* Epoll events */
  epoll_data_t data;	/* User data variable */
};

其中events可以包含以下事件类型:

  • EPOLLIN: 描述符可读
  • EPOLLOUT: 描述符可写
  • EPOLLRDHUP(since Linux 2.6.17): 流套接字对端关闭连接或者关闭写端
  • EPOLLPRI: 紧急数据可读
  • EPOLLERR: 描述符发生错误,该事件由内核一直监听(比如connect套接字失败会返回EPOLLERR)
  • EPOLLHUP: 文件秒杀符被中断,该事件由内核一直监听
  • EPOLLET: 开启边缘触发,默认是水平触发
  • EPOLLONESHOT: 一个事件发生并读取之后,fd自动不再监控;若要重新监控需要使用EPOLL_CTL_MOD重新设置
    返回值: 成功返回0,失败返回-1并设置errno

等待epoll事件

int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);

参数说明:

  • epfd: epoll_create返回的epoll实例
  • events: 存储epoll_event的数组地址
  • maxevents: 最大事件的数量,需>0
  • timeout: 等待的最长时间
    返回值:
    成功时返回就绪的监听文件描述符数;当超出timeout指定的时间后如果无就绪的文件描述符,返回0;发生错误时返回-1并设置errno
    另外,Linux kernel 2.6.19 引入了epoll_pwait,可以在等待时设置信号掩码,其使用方式类似pselect

some problems:

  1. epoll 怎么判断是connect请求还是有数据可读?
    ans: 判断events[i].data.fd == listen_fd
  2. read 返回值说明:
  • return -1 and errno == EAGAIN: 数据已经读完,没有可读数据
  • return 0: end of file,对端关闭连接

关于水平触发(Level-Triggered)和边缘触发(Edge-Triggered)
当缓冲区有数据可读时,ET会触发一次事件,之后就不会再触发;而LT只要我们没有读完缓冲区的数据,事件就会一直触发。
推荐使用的epoll ET方式如下:

  1. 设置fd为非阻塞
  2. 当调用read或write读写时,在其返回-1,且errno == EAGAIN 后再调用epoll_wait等待
    tips:
    ET模式只能用于设置了O_NONBLOCK的fd,而LT则同时支持同步及异步。如果将ET模式应用与阻塞情况,将出现如下问题:
    当对端send 2 byte数据,而服务端只读取了1 byte后再去调用epoll_wait,这时将不产生读事件。直到对端又有数据发送过来,epoll_wait才会再次返回
    补充:
    Q:当又有事件产生时会怎么样,原来的数据还在吗?
    A:原来的数据还在socket缓冲区

epoll实例

epoll使用参考:

static int
create_and_bind (char *port)
{
  struct addrinfo hints;
  struct addrinfo *result, *rp;
  int s, sfd;

  memset (&hints, 0, sizeof (struct addrinfo));
  hints.ai_family = AF_UNSPEC;     /* Return IPv4 and IPv6 choices */
  hints.ai_socktype = SOCK_STREAM; /* We want a TCP socket */
  hints.ai_flags = AI_PASSIVE;     /* All interfaces */

  s = getaddrinfo (NULL, port, &hints, &result);
  if (s != 0)
    {
      fprintf (stderr, "getaddrinfo: %s\n", gai_strerror (s));
      return -1;
    }

  for (rp = result; rp != NULL; rp = rp->ai_next)
    {
      sfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
      if (sfd == -1)
        continue;

      s = bind (sfd, rp->ai_addr, rp->ai_addrlen);
      if (s == 0)
        {
          /* We managed to bind successfully! */
          break;
        }

      close (sfd);
    }

  if (rp == NULL)
    {
      fprintf (stderr, "Could not bind\n");
      return -1;
    }

  freeaddrinfo (result);

  return sfd;
}

static int
make_socket_non_blocking (int sfd)
{
  int flags, s;

  flags = fcntl (sfd, F_GETFL, 0);
  if (flags == -1)
    {
      perror ("fcntl");
      return -1;
    }

  flags |= O_NONBLOCK;
  s = fcntl (sfd, F_SETFL, flags);
  if (s == -1)
    {
      perror ("fcntl");
      return -1;
    }

  return 0;
}

#define MAXEVENTS 64

int
main (int argc, char *argv[])
{
  int sfd, s;
  int efd;
  struct epoll_event event;
  struct epoll_event *events;

  if (argc != 2)
    {
      fprintf (stderr, "Usage: %s [port]\n", argv[0]);
      exit (EXIT_FAILURE);
    }

  sfd = create_and_bind (argv[1]);
  if (sfd == -1)
    abort ();

  s = make_socket_non_blocking (sfd);
  if (s == -1)
    abort ();

  s = listen (sfd, SOMAXCONN);
  if (s == -1)
    {
      perror ("listen");
      abort ();
    }

  efd = epoll_create1 (0);
  if (efd == -1)
    {
      perror ("epoll_create");
      abort ();
    }

  event.data.fd = sfd;
  event.events = EPOLLIN | EPOLLET;
  s = epoll_ctl (efd, EPOLL_CTL_ADD, sfd, &event);
  if (s == -1)
    {
      perror ("epoll_ctl");
      abort ();
    }

  /* Buffer where events are returned */
  events = calloc (MAXEVENTS, sizeof event);

  /* The event loop */
  while (1)
    {
      int n, i;

      n = epoll_wait (efd, events, MAXEVENTS, -1);
      for (i = 0; i < n; i++)
	{
	  if ((events[i].events & EPOLLERR) ||
              (events[i].events & EPOLLHUP) ||
              (!(events[i].events & EPOLLIN)))
	    {
              /* An error has occured on this fd, or the socket is not
                 ready for reading (why were we notified then?) */
	      fprintf (stderr, "epoll error\n");
	      close (events[i].data.fd);
	      continue;
	    }

	  else if (sfd == events[i].data.fd)
	    {
              /* We have a notification on the listening socket, which
                 means one or more incoming connections. */
              while (1)
                {
                  struct sockaddr in_addr;
                  socklen_t in_len;
                  int infd;
                  char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];

                  in_len = sizeof in_addr;
                  infd = accept (sfd, &in_addr, &in_len);
                  if (infd == -1)
                    {
                      if ((errno == EAGAIN) ||
                          (errno == EWOULDBLOCK))
                        {
                          /* We have processed all incoming
                             connections. */
                          break;
                        }
                      else
                        {
                          perror ("accept");
                          break;
                        }
                    }

                  s = getnameinfo (&in_addr, in_len,
                                   hbuf, sizeof hbuf,
                                   sbuf, sizeof sbuf,
                                   NI_NUMERICHOST | NI_NUMERICSERV);
                  if (s == 0)
                    {
                      printf("Accepted connection on descriptor %d "
                             "(host=%s, port=%s)\n", infd, hbuf, sbuf);
                    }

                  /* Make the incoming socket non-blocking and add it to the
                     list of fds to monitor. */
                  s = make_socket_non_blocking (infd);
                  if (s == -1)
                    abort ();

                  event.data.fd = infd;
                  event.events = EPOLLIN | EPOLLET;
                  s = epoll_ctl (efd, EPOLL_CTL_ADD, infd, &event);
                  if (s == -1)
                    {
                      perror ("epoll_ctl");
                      abort ();
                    }
                }
              continue;
            }
          else
            {
              /* We have data on the fd waiting to be read. Read and
                 display it. We must read whatever data is available
                 completely, as we are running in edge-triggered mode
                 and won't get a notification again for the same
                 data. */
              int done = 0;

              while (1)
                {
                  ssize_t count;
                  char buf[512];

                  count = read (events[i].data.fd, buf, sizeof buf);
                  if (count == -1)
                    {
                      /* If errno == EAGAIN, that means we have read all
                         data. So go back to the main loop. */
                      if (errno != EAGAIN)
                        {
                          perror ("read");
                          done = 1;
                        }
                      break;
                    }
                  else if (count == 0)
                    {
                      /* End of file. The remote has closed the
                         connection. */
                      done = 1;
                      break;
                    }

                  /* Write the buffer to standard output */
                  s = write (1, buf, count);
                  if (s == -1)
                    {
                      perror ("write");
                      abort ();
                    }
                }

              if (done)
                {
                  printf ("Closed connection on descriptor %d\n",
                          events[i].data.fd);

                  /* Closing the descriptor will make epoll remove it
                     from the set of descriptors which are monitored. */
                  close (events[i].data.fd);
                }
            }
        }
    }

  free (events);

  close (sfd);

  return EXIT_SUCCESS;
}
  • 客户端代码:
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <string.h>
#include <stdlib.h>
int create_and_connect(char * port)
{
    if(NULL == port)
    {
        return -1;
    }

    struct addrinfo hints;
    memset(&hints, 0, sizeof(struct addrinfo));
    hints.ai_family = AF_UNSPEC;
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = AI_PASSIVE;

    struct addrinfo *result;
    int ret = getaddrinfo(NULL, port, &hints, &result);
    if(ret != 0)
    {
        fprintf(stderr, "getaddrinfo error: %s\n", gai_strerror(ret));
        return -1;
    }

    struct addrinfo *rp;
    int cfd;
    for(rp = result; rp != NULL; rp = rp->ai_next)
    {
        cfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
        if(-1 == cfd)
        {
            continue;
        }
    
        //client connect
        ret = connect(cfd, rp->ai_addr, rp->ai_addrlen);
        if(0 == ret)
        {
            break;
        }
        
        close(cfd);
    }

    if(NULL == rp)
    {
        fprintf(stderr, "connect to port failed!\n");
        return -1;
    }

    freeaddrinfo(result);

    return cfd;
}

int main(int argc, char *argv[])
{
    if(argc != 2)
    {
        fprintf(stderr, "Usage: %s [port]\n", argv[0]);
        exit(-1);
    }

    //clinet send something
    int cfd = create_and_connect(argv[1]);
    if(-1 == cfd)
    {
        fprintf(stderr, "create_and_connect failed\n");
        return -1;    
    }
    char *pData = "Client hello!";
    int dataLen = strlen(pData);
    send(cfd, pData, dataLen, 0);
    sleep(1);
    
    send(cfd, pData, dataLen, 0);
    return 0;
}

mac下的epoll

mac os不支持epoll,其使用kqueue实现(类似epoll),头文件 sys/event.h
linkhttps://zhuanlan.zhihu.com/p/21375144

epoll源码实现

epoll源码实现分析
epoll源码实现分析(整理)

参考博客:Linux epoll 详解

linux下的socket程序示例

可以从linux下的socket编程Client/Server示例下载

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值