最近在看linux内核驱动,想实现一个支持epoll的机制,看到了eventfd机制,所以就自己记录下eventfd的机制和心得。
机制
eventfd是一个将内存作为虚拟文件来操作的文件描述符。在创建eventfd的时候会通过
fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
创建一个名为eventfd的虚拟文件。
通过这个文件描述符我们就可以进行读写这个文件所映射的事件体。
结构体
struct eventfd_ctx {
struct kref kref;
wait_queue_head_t wqh;
/*
* 每次对eventfd执行write(2)时,将要写入的__u64的值添加到“ count”,
* 并在“ wqh”上执行唤醒。
* read(2)将“ count”值返回到用户空间,并将“ count”重置为零。
* 内核端eventfd_signal()也添加到“ count”计数器中并发出唤醒。
*/
__u64 count;
unsigned int flags;
};
创建eventfd
linux 内核提供了int eventfd(int count)
和int eventfd(int count,int flags)
系统调用
static int do_eventfd(unsigned int count, int flags)
{
struct eventfd_ctx *ctx;
int fd;
/* Check the EFD_* constants for consistency. */
BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
if (flags & ~EFD_FLAGS_SET)
return -EINVAL;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
kref_init(&ctx->kref);
init_waitqueue_head(&ctx->wqh);
ctx->count = count;
ctx->flags = flags;
fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
if (fd < 0)
eventfd_free_ctx(ctx);
return fd;
}
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
{
return do_eventfd(count, flags);
}
SYSCALL_DEFINE1(eventfd, unsigned int, count)
{
return do_eventfd(count, 0);
}
eventfd的操作集
static const struct file_operations eventfd_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = eventfd_show_fdinfo,
#endif
.release = eventfd_release,
.poll = eventfd_poll,
.read = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
};
eventfd 支持以上操作
show_fdinfo
打印出count的值
seq_printf(m, "eventfd-count: %16llx\n",
(unsigned long long)ctx->count);
eventfd_release
释放到对eventfd的上下文引用
wake_up_poll(&ctx->wqh, EPOLLHUP); //表示关闭
eventfd_ctx_put(ctx); //释放引用
eventfd_poll
轮休查看等待队列
poll_wait(file, &ctx->wqh, wait);
count = READ_ONCE(ctx->count);
if (count > 0)
events |= EPOLLIN;
if (count == ULLONG_MAX)
events |= EPOLLERR;
if (ULLONG_MAX - 1 > count)
events |= EPOLLOUT;
如果count>0
,则设置events |= EPOLLIN
表示可以读取
如果count == ULLONG_MAX
,则设置events |= EPOLLERR
表示出错
如果ULLONG_MAX - 1 > count
,则设置events |= EPOLLOUT
表示可以写入
read
eventfd_read
读取count,并将count设置为0;然后唤醒等待队列
如果count等于0,并且没有设置O_NONBLOCK
,则会等待事件。
如果flags设置了EFD_SEMAPHORE
则会将count减一,读取到的是1; 然后唤醒等待队列
write
eventfd_write
写入一个uint64 的ucnt,并将ucnt的值加入count;
即count += ucnt
,然后唤醒等待读的队列;
如果flags没有设置O_NONBLOCK
,并且ULLONG_MAX - ctx->count > ucnt
,则会进行等待先读出count值,然后再写入。
noop_llseek
也就是没有llseek接口。
#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdint.h>
#include <pthread.h>
#include <sys/eventfd.h>
#include <sys/epoll.h>
int efd = -1;
void *read_thread(void *dummy)
{
int ret = 0;
uint64_t count = 0;
int ep_fd = -1;
struct epoll_event events[10];
if (efd < 0)
{
printf("efd not inited.\n");
goto fail;
}
ep_fd = epoll_create(1024);
if (ep_fd < 0)
{
perror("epoll_create fail: ");
goto fail;
}
{
struct epoll_event read_event;
read_event.events = EPOLLHUP | EPOLLERR | EPOLLIN;
read_event.data.fd = efd;
ret = epoll_ctl(ep_fd, EPOLL_CTL_ADD, efd, &read_event);
if (ret < 0)
{
perror("epoll ctl failed:");
goto fail;
}
}
while (1)
{
ret = epoll_wait(ep_fd, &events[0], 10, 5000);
if (ret > 0)
{
int i = 0;
for (; i < ret; i++)
{
if (events[i].events & EPOLLHUP)
{
printf("epoll eventfd has epoll hup.\n");
goto fail;
}
else if (events[i].events & EPOLLERR)
{
printf("epoll eventfd has epoll error.\n");
goto fail;
}
else if (events[i].events & EPOLLIN)
{
int event_fd = events[i].data.fd;
ret = read(event_fd, &count, sizeof(count));
if (ret < 0)
{
perror("read fail:");
goto fail;
}
else
{
struct timeval tv;
gettimeofday(&tv, NULL);
printf("success read from efd, read %d bytes(%llu) at %lds %ldus\n",
ret, count, tv.tv_sec, tv.tv_usec);
}
}
}
}
else if (ret == 0)
{
/* time out */
printf("epoll wait timed out.\n");
break;
}
else
{
perror("epoll wait error:");
goto fail;
}
}
fail:
if (ep_fd >= 0)
{
close(ep_fd);
ep_fd = -1;
}
return NULL;
}
int main(int argc, char *argv[])
{
pthread_t pid = 0;
uint64_t count = 0;
int ret = 0;
int i = 0;
efd = eventfd(0, 0);
if (efd < 0)
{
perror("eventfd failed.");
goto fail;
}
ret = pthread_create(&pid, NULL, read_thread, NULL);
if (ret < 0)
{
perror("pthread create:");
goto fail;
}
for (i = 0; i < 5; i++)
{
count = 4;
ret = write(efd, &count, sizeof(count));
if (ret < 0)
{
perror("write event fd fail:");
goto fail;
}
else
{
struct timeval tv;
gettimeofday(&tv, NULL);
printf("success write to efd, write %d bytes(%llu) at %lds %ldus\n",
ret, count, tv.tv_sec, tv.tv_usec);
}
sleep(1);
}
fail:
if (0 != pid)
{
pthread_join(pid, NULL);
pid = 0;
}
if (efd >= 0)
{
close(efd);
efd = -1;
}
return ret;
}
输出
[root@node35 test]# ./a.out
success write to efd, write 8 bytes(4) at 1602485041s 860397us
success read from efd, read 8 bytes(4) at 1602485041s 860426us
success write to efd, write 8 bytes(4) at 1602485042s 860472us
success read from efd, read 8 bytes(4) at 1602485042s 860475us
success write to efd, write 8 bytes(4) at 1602485043s 860542us
success read from efd, read 8 bytes(4) at 1602485043s 860545us
success write to efd, write 8 bytes(4) at 1602485044s 860615us
success read from efd, read 8 bytes(4) at 1602485044s 860618us
success write to efd, write 8 bytes(4) at 1602485045s 860686us
success read from efd, read 8 bytes(4) at 1602485045s 860690us
epoll wait timed out.