static grpc_error *pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *ps,
grpc_pollset_worker **worker_hdl,
grpc_millis deadline) {
grpc_pollset_worker worker;
...
if (begin_worker(exec_ctx, ps, &worker, worker_hdl, deadline)) {
...
/* This is the designated polling thread at this point and should ideally do
polling. However, if there are unprocessed events left from a previous
call to do_epoll_wait(), skip calling epoll_wait() in this iteration and
process the pending epoll events.
The reason for decoupling do_epoll_wait and process_epoll_events is to
better distrubute the work (i.e handling epoll events) across multiple
threads
process_epoll_events() returns very quickly: It just queues the work on
exec_ctx but does not execute it (the actual exectution or more
accurately grpc_exec_ctx_flush() happens in end_worker() AFTER selecting
a designated poller). So we are not waiting long periods without a
designated poller */
/* 到这里是轮询线程的分发处理,如果还有上次调用do_epoll_wait后未被处理的事件,则跳过
调用epoll_wait,直到处理完剩余的事件。
为了多线程下更好的分发工作,有必要将do_epoll_wait 和 process_epoll_events 分开
处理。
process_epoll_events() 会很快地结束:它只是简单的将工作放到exec_ctx的队列里面,
但是并不执行(真正的执行是放在end_worker里面的grpc_exec_ctx_flush)。所以我们不
需要等待太长的时间。*/
if (gpr_atm_acq_load(&g_epoll_set.cursor) ==
gpr_atm_acq_load(&g_epoll_set.num_events)) {
append_error(&error, do_epoll_wait(exec_ctx, ps, deadline), err_desc);
}
append_error(&error, process_epoll_events(exec_ctx, ps), err_desc);
}
end_worker(exec_ctx, ps, &worker, worker_hdl);
...
return error;
}
流程 begin_worker -> do_epoll_wait -> process_epoll_events -> end_worker
begin_worker 初始化刚创建好的 worker, 并将worker加入到pollset中。
static grpc_error *do_epoll_wait(grpc_exec_ctx *exec_ctx, grpc_pollset *ps,
grpc_millis deadline) {
GPR_TIMER_BEGIN("do_epoll_wait", 0);
int r;
int timeout = poll_deadline_to_millis_timeout(exec_ctx, deadline);
if (timeout != 0) {
GRPC_SCHEDULING_START_BLOCKING_REGION;
}
do {
GRPC_STATS_INC_SYSCALL_POLL(exec_ctx);
r = epoll_wait(g_epoll_set.epfd, g_epoll_set.events, MAX_EPOLL_EVENTS,
timeout);
} while (r < 0 && errno == EINTR);
if (timeout != 0) {
GRPC_SCHEDULING_END_BLOCKING_REGION_WITH_EXEC_CTX(exec_ctx);
}
if (r < 0) return GRPC_OS_ERROR(errno, "epoll_wait");
GRPC_STATS_INC_POLL_EVENTS_RETURNED(exec_ctx, r);
if (GRPC_TRACER_ON(grpc_polling_trace)) {
gpr_log(GPR_DEBUG, "ps: %p poll got %d events", ps, r);
}
gpr_atm_rel_store(&g_epoll_set.num_events, r);
gpr_atm_rel_store(&g_epoll_set.cursor, 0);
GPR_TIMER_END("do_epoll_wait", 0);
return GRPC_ERROR_NONE;
}
do_epoll_wait: 等待epoll事件
static grpc_error *process_epoll_events(grpc_exec_ctx *exec_ctx,
grpc_pollset *pollset) {
static const char *err_desc = "process_events";
grpc_error *error = GRPC_ERROR_NONE;
GPR_TIMER_BEGIN("process_epoll_events", 0);
long num_events = gpr_atm_acq_load(&g_epoll_set.num_events);
long cursor = gpr_atm_acq_load(&g_epoll_set.cursor);
for (int idx = 0;
(idx < MAX_EPOLL_EVENTS_HANDLED_PER_ITERATION) && cursor != num_events;
idx++) {
long c = cursor++;
struct epoll_event *ev = &g_epoll_set.events[c];
void *data_ptr = ev->data.ptr;
if (data_ptr == &global_wakeup_fd) {
append_error(&error, grpc_wakeup_fd_consume_wakeup(&global_wakeup_fd),
err_desc);
} else {
grpc_fd *fd = (grpc_fd *)(data_ptr);
bool cancel = (ev->events & (EPOLLERR | EPOLLHUP)) != 0;
bool read_ev = (ev->events & (EPOLLIN | EPOLLPRI)) != 0;
bool write_ev = (ev->events & EPOLLOUT) != 0;
if (read_ev || cancel) {
fd_become_readable(exec_ctx, fd, pollset);
}
if (write_ev || cancel) {
fd_become_writable(exec_ctx, fd);
}
}
}
gpr_atm_rel_store(&g_epoll_set.cursor, cursor);
GPR_TIMER_END("process_epoll_events", 0);
return error;
}
等到事件后开始处理事件,但是这时并没有执行真正的读写操作, 有点奇怪的是for的写法,为什么不是:
for (int idx = cursor; cursor != num_events; idx++) {
...
}
接着看 fd_become_readable:
static void fd_become_readable(grpc_exec_ctx *exec_ctx, grpc_fd *fd,
grpc_pollset *notifier) {
grpc_lfev_set_ready(exec_ctx, &fd->read_closure, "read");
/* Use release store to match with acquire load in fd_get_read_notifier */
gpr_atm_rel_store(&fd->read_notifier_pollset, (gpr_atm)notifier);
}
这里将fd的read_closure设为CLOSURE_READY状态, read_notifier_pollset 设置为前面的ps。
void grpc_lfev_set_ready(grpc_exec_ctx *exec_ctx, gpr_atm *state,
const char *variable) {
while (true) {
gpr_atm curr = gpr_atm_no_barrier_load(state);
if (GRPC_TRACER_ON(grpc_polling_trace)) {
gpr_log(GPR_ERROR, "lfev_set_ready[%s]: %p curr=%p", variable, state,
(void *)curr);
}
switch (curr) {
case CLOSURE_READY: {
/* Already ready. We are done here */
return;
}
case CLOSURE_NOT_READY: {
/* No barrier required as we're transitioning to a state that does not
involve a closure */
if (gpr_atm_no_barrier_cas(state, CLOSURE_NOT_READY, CLOSURE_READY)) {
return; /* early out */
}
break; /* retry */
}
default: {
/* 'curr' is either a closure or the fd is shutdown */
if ((curr & FD_SHUTDOWN_BIT) > 0) {
/* The fd is shutdown. Do nothing */
return;
}
/* Full cas: acquire pairs with this cas' release in the event of a
spurious set_ready; release pairs with this or the acquire in
notify_on (or set_shutdown) */
else if (gpr_atm_full_cas(state, curr, CLOSURE_NOT_READY)) {
GRPC_CLOSURE_SCHED(exec_ctx, (grpc_closure *)curr, GRPC_ERROR_NONE);
return;
}
/* else the state changed again (only possible by either a racing
set_ready or set_shutdown functions. In both these cases, the closure
would have been scheduled for execution. So we are done here */
return;
}
}
}
}
注意这里的GRPC_CLOSURE_SCHED宏,它会将curr添加到exec_ctx->closure_list。
在end_worker中主要是grpc_exec_ctx_flush这个函数,
bool grpc_exec_ctx_flush(grpc_exec_ctx *exec_ctx) {
bool did_something = 0;
GPR_TIMER_BEGIN("grpc_exec_ctx_flush", 0);
for (;;) {
if (!grpc_closure_list_empty(exec_ctx->closure_list)) {
grpc_closure *c = exec_ctx->closure_list.head;
exec_ctx->closure_list.head = exec_ctx->closure_list.tail = NULL;
while (c != NULL) {
grpc_closure *next = c->next_data.next;
grpc_error *error = c->error_data.error;
did_something = true;
exec_ctx_run(exec_ctx, c, error);
c = next;
}
} else if (!grpc_combiner_continue_exec_ctx(exec_ctx)) {
break;
}
}
GPR_ASSERT(exec_ctx->active_combiner == NULL);
GPR_TIMER_END("grpc_exec_ctx_flush", 0);
return did_something;
}
在这个函数中会先处理exec_ctx->closure_list中所有的closure,然后再通过grpc_combiner_continue_exec_ctx处理在closure处理函数中产生的combiner。
在gRpc中所有的网络事件都会被封装成grpc_closure,然后放到exec_ctx->closure_list。
而在closure回调函数中产生的异步操作则被封装成grpc_combiner,放到exec_ctx->last_combiner中。