结构
其支持epoll和kqueue
初始化
brpc的io事件分发器,使用多线程Reactor模式
通过InitializeGlobalDispatchers
来初始化全局io事件分发器
分为task_group_ntags
组,每组有event_dispatcher_num
void InitializeGlobalDispatchers() {
g_edisp = new EventDispatcher[FLAGS_task_group_ntags * FLAGS_event_dispatcher_num];
for (int i = 0; i < FLAGS_task_group_ntags; ++i) {
for (int j = 0; j < FLAGS_event_dispatcher_num; ++j) {
bthread_attr_t attr =
FLAGS_usercode_in_pthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL;
attr.tag = (BTHREAD_TAG_DEFAULT + i) % FLAGS_task_group_ntags;
CHECK_EQ(0, g_edisp[i * FLAGS_event_dispatcher_num + j].Start(&attr));
}
}
// This atexit is will be run before g_task_control.stop() because above
// Start() initializes g_task_control by creating bthread (to run epoll/kqueue).
CHECK_EQ(0, atexit(StopAndJoinGlobalDispatchers));
}
分发策略
EventDispatcher& GetGlobalEventDispatcher(int fd, bthread_tag_t tag) {
pthread_once(&g_edisp_once, InitializeGlobalDispatchers);
if (FLAGS_task_group_ntags == 1 && FLAGS_event_dispatcher_num == 1) {
return g_edisp[0];
}
int index = butil::fmix32(fd) % FLAGS_event_dispatcher_num;
return g_edisp[tag * FLAGS_event_dispatcher_num + index];
}
读事件
对于acceptor,读事件处理函数为OnNewConnections
options.on_edge_triggered_events = OnNewConnections;
连接后新socket的读事件处理函数为OnNewDataFromTcp
或者OnNewMessages
#if BRPC_WITH_RDMA
if (am->_use_rdma) {
options.on_edge_triggered_events = rdma::RdmaEndpoint::OnNewDataFromTcp;
} else {
#else
{
#endif
options.on_edge_triggered_events = InputMessenger::OnNewMessages;
}
写事件
对于非阻塞connect时,会调用事件分发器的RegisterEvent
注册EPOLLOUT
int Socket::Connect(const timespec* abstime,
int (*on_connect)(int, int, void*), void* data) {
if (_ssl_ctx) {
_ssl_state = SSL_CONNECTING;
} else {
_ssl_state = SSL_OFF;
}
struct sockaddr_storage serv_addr;
socklen_t addr_size = 0;
if (butil::endpoint2sockaddr(remote_side(), &serv_addr, &addr_size) != 0) {
PLOG(ERROR) << "Fail to get sockaddr";
return -1;
}
butil::fd_guard sockfd(socket(serv_addr.ss_family, SOCK_STREAM, 0));
if (sockfd < 0) {
PLOG(ERROR) << "Fail to create socket";
return -1;
}
CHECK_EQ(0, butil::make_close_on_exec(sockfd));
// We need to do async connect (to manage the timeout by ourselves).
CHECK_EQ(0, butil::make_non_blocking(sockfd));
const int rc = ::connect(
sockfd, (struct sockaddr*)&serv_addr, addr_size);
if (rc != 0 && errno != EINPROGRESS) {
PLOG(WARNING) << "Fail to connect to " << remote_side();
return -1;
}
if (on_connect) {
EpollOutRequest* req = new(std::nothrow) EpollOutRequest;
if (req == NULL) {
LOG(FATAL) << "Fail to new EpollOutRequest";
return -1;
}
req->fd = sockfd;
req->timer_id = 0;
req->on_epollout_event = on_connect;
req->data = data;
// A temporary Socket to hold `EpollOutRequest', which will
// be added into epoll device soon
SocketId connect_id;
SocketOptions options;
options.bthread_tag = _bthread_tag;
options.user = req;
if (Socket::Create(options, &connect_id) != 0) {
LOG(FATAL) << "Fail to create Socket";
delete req;
return -1;
}
// From now on, ownership of `req' has been transferred to
// `connect_id'. We hold an additional reference here to
// ensure `req' to be valid in this scope
SocketUniquePtr s;
CHECK_EQ(0, Socket::Address(connect_id, &s));
// Add `sockfd' into epoll so that `HandleEpollOutRequest' will
// be called with `req' when epoll event reaches
if (GetGlobalEventDispatcher(sockfd, _bthread_tag).RegisterEvent(connect_id, sockfd, false) !=
0) {
const int saved_errno = errno;
PLOG(WARNING) << "Fail to add fd=" << sockfd << " into epoll";
s->SetFailed(saved_errno, "Fail to add fd=%d into epoll: %s",
(int)sockfd, berror(saved_errno));
return -1;
}
// Register a timer for EpollOutRequest. Note that the timeout
// callback has no race with the one above as both of them try
// to `SetFailed' `connect_id' while only one of them can succeed
// It also work when `HandleEpollOutRequest' has already been
// called before adding the timer since it will be removed
// inside destructor of `EpollOutRequest' after leaving this scope
if (abstime) {
int rc = bthread_timer_add(&req->timer_id, *abstime,
HandleEpollOutTimeout,
(void*)connect_id);
if (rc) {
LOG(ERROR) << "Fail to add timer: " << berror(rc);
s->SetFailed(rc, "Fail to add timer: %s", berror(rc));
return -1;
}
}
} else {
if (WaitEpollOut(sockfd, false, abstime) != 0) {
PLOG(WARNING) << "Fail to wait EPOLLOUT of fd=" << sockfd;
return -1;
}
if (CheckConnected(sockfd) != 0) {
return -1;
}
}
return sockfd.release();
}
EventDispatcher
初始化
对于epoll,创建epoll,以及创建管理,用于唤醒epoll的事件循环
EventDispatcher::EventDispatcher()
: _epfd(-1)
, _stop(false)
, _tid(0)
, _consumer_thread_attr(BTHREAD_ATTR_NORMAL)
{
_epfd = epoll_create(1024 * 1024);
if (_epfd < 0) {
PLOG(FATAL) << "Fail to create epoll";
return;
}
CHECK_EQ(0, butil::make_close_on_exec(_epfd));
_wakeup_fds[0] = -1;
_wakeup_fds[1] = -1;
if (pipe(_wakeup_fds) != 0) {
PLOG(FATAL) << "Fail to create pipe";
return;
}
}
启动事件循环
使用bthread_start_background
启动协程执行RunThis
int EventDispatcher::Start(const bthread_attr_t* consumer_thread_attr) {
if (_epfd < 0) {
LOG(FATAL) << "epoll was not created";
return -1;
}
if (_tid != 0) {
LOG(FATAL) << "Already started this dispatcher(" << this
<< ") in bthread=" << _tid;
return -1;
}
// Set _consumer_thread_attr before creating epoll thread to make sure
// everyting seems sane to the thread.
_consumer_thread_attr = (consumer_thread_attr ?
*consumer_thread_attr : BTHREAD_ATTR_NORMAL);
//_consumer_thread_attr is used in StartInputEvent(), assign flag NEVER_QUIT to it will cause new bthread
// that created by epoll_wait() never to quit.
bthread_attr_t epoll_thread_attr = _consumer_thread_attr | BTHREAD_NEVER_QUIT;
// Polling thread uses the same attr for consumer threads (NORMAL right
// now). Previously, we used small stack (32KB) which may be overflowed
// when the older comlog (e.g. 3.1.85) calls com_openlog_r(). Since this
// is also a potential issue for consumer threads, using the same attr
// should be a reasonable solution.
int rc = bthread_start_background(
&_tid, &epoll_thread_attr, RunThis, this);
if (rc) {
LOG(FATAL) << "Fail to create epoll thread: " << berror(rc);
return -1;
}
return 0;
}
void* EventDispatcher::RunThis(void* arg) {
((EventDispatcher*)arg)->Run();
return NULL;
}
void EventDispatcher::Run() {
while (!_stop) {
epoll_event e[32];
#ifdef BRPC_ADDITIONAL_EPOLL
// Performance downgrades in examples.
int n = epoll_wait(_epfd, e, ARRAY_SIZE(e), 0);
if (n == 0) {
n = epoll_wait(_epfd, e, ARRAY_SIZE(e), -1);
}
#else
const int n = epoll_wait(_epfd, e, ARRAY_SIZE(e), -1);
#endif
if (_stop) {
// epoll_ctl/epoll_wait should have some sort of memory fencing
// guaranteeing that we(after epoll_wait) see _stop set before
// epoll_ctl.
break;
}
if (n < 0) {
if (EINTR == errno) {
// We've checked _stop, no wake-up will be missed.
continue;
}
PLOG(FATAL) << "Fail to epoll_wait epfd=" << _epfd;
break;
}
for (int i = 0; i < n; ++i) {
if (e[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP)
#ifdef BRPC_SOCKET_HAS_EOF
|| (e[i].events & has_epollrdhup)
#endif
) {
// We don't care about the return value.
Socket::StartInputEvent(e[i].data.u64, e[i].events,
_consumer_thread_attr);
}
}
for (int i = 0; i < n; ++i) {
if (e[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP)) {
// We don't care about the return value.
Socket::HandleEpollOut(e[i].data.u64);
}
}
}
}
读事件触发里,会执行StartInputEvent
,将SocketId
转成Socket
,将_nevent
事件数加1
执行注册的边缘触发函数_on_edge_triggered_events
·
int Socket::StartInputEvent(SocketId id, uint32_t events,
const bthread_attr_t& thread_attr) {
SocketUniquePtr s;
if (Address(id, &s) < 0) {
return -1;
}
if (NULL == s->_on_edge_triggered_events) {
// Callback can be NULL when receiving error epoll events
// (Added into epoll by `WaitConnected')
return 0;
}
if (s->fd() < 0) {
#if defined(OS_LINUX)
CHECK(!(events & EPOLLIN)) << "epoll_events=" << events;
#elif defined(OS_MACOSX)
CHECK((short)events != EVFILT_READ) << "kqueue filter=" << events;
#endif
return -1;
}
// if (events & has_epollrdhup) {
// s->_eof = 1;
// }
// Passing e[i].events causes complex visibility issues and
// requires stronger memory fences, since reading the fd returns
// error as well, we don't pass the events.
if (s->_nevent.fetch_add(1, butil::memory_order_acq_rel) == 0) {
// According to the stats, above fetch_add is very effective. In a
// server processing 1 million requests per second, this counter
// is just 1500~1700/s
g_vars->neventthread << 1;
bthread_t tid;
// transfer ownership as well, don't use s anymore!
Socket* const p = s.release();
bthread_attr_t attr = thread_attr;
attr.keytable_pool = p->_keytable_pool;
attr.tag = bthread_self_tag();
if (FLAGS_usercode_in_coroutine) {
ProcessEvent(p);
} else if (bthread_start_urgent(&tid, &attr, ProcessEvent, p) != 0) {
LOG(FATAL) << "Fail to start ProcessEvent";
ProcessEvent(p);
}
}
return 0;
}
void* Socket::ProcessEvent(void* arg) {
// the enclosed Socket is valid and free to access inside this function.
SocketUniquePtr s(static_cast<Socket*>(arg));
s->_on_edge_triggered_events(s.get());
return NULL;
}
注册事件
RegisterEvent
注册是将SocketId
类型的数据添加到evt.data.u64
,主要是注册写事件,根据参数pollin
决定是修改注册事件,添加读写事件,使用的是边缘触发;在处理事件时,可以通过SocketId
来得到对应的Socket
int EventDispatcher::RegisterEvent(SocketId socket_id, int fd, bool pollin) {
if (_epfd < 0) {
errno = EINVAL;
return -1;
}
epoll_event evt;
evt.data.u64 = socket_id;
evt.events = EPOLLOUT | EPOLLET;
#ifdef BRPC_SOCKET_HAS_EOF
evt.events |= has_epollrdhup;
#endif
if (pollin) {
evt.events |= EPOLLIN;
if (epoll_ctl(_epfd, EPOLL_CTL_MOD, fd, &evt) < 0) {
// This fd has been removed from epoll via `RemoveConsumer',
// in which case errno will be ENOENT
return -1;
}
} else {
if (epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt) < 0) {
return -1;
}
}
return 0;
}
AddConsumer
是注册读事件
int EventDispatcher::AddConsumer(SocketId socket_id, int fd) {
if (_epfd < 0) {
errno = EINVAL;
return -1;
}
epoll_event evt;
evt.events = EPOLLIN | EPOLLET;
evt.data.u64 = socket_id;
#ifdef BRPC_SOCKET_HAS_EOF
evt.events |= has_epollrdhup;
#endif
return epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt);
return -1;
}
注销事件
UnregisterEvent
根据pollin
来决定 ,如果pollin
为false,则从epoll中删除对应的fd,否则将关注的事件只设置为可读
int EventDispatcher::UnregisterEvent(SocketId socket_id,
int fd, bool pollin) {
if (pollin) {
epoll_event evt;
evt.data.u64 = socket_id;
evt.events = EPOLLIN | EPOLLET;
#ifdef BRPC_SOCKET_HAS_EOF
evt.events |= has_epollrdhup;
#endif
return epoll_ctl(_epfd, EPOLL_CTL_MOD, fd, &evt);
} else {
return epoll_ctl(_epfd, EPOLL_CTL_DEL, fd, NULL);
}
return -1;
}
RemoveConsumer
是从epoll中删除对应的fd
int EventDispatcher::RemoveConsumer(int fd) {
if (fd < 0) {
return -1;
}
// Removing the consumer from dispatcher before closing the fd because
// if process was forked and the fd is not marked as close-on-exec,
// closing does not set reference count of the fd to 0, thus does not
// remove the fd from epoll. More badly, the fd will not be removable
// from epoll again! If the fd was level-triggered and there's data left,
// epoll_wait will keep returning events of the fd continuously, making
// program abnormal.
if (epoll_ctl(_epfd, EPOLL_CTL_DEL, fd, NULL) < 0) {
PLOG(WARNING) << "Fail to remove fd=" << fd << " from epfd=" << _epfd;
return -1;
}
return 0;
}