brpc之io事件分发器

结构

EventDispatcher
- int _epfd
- volatile bool _stop
- bthread_t _tid
- bthread_attr_t _consumer_thread_attr
- int _wakeup_fds[2]
+virtual int Start(const bthread_attr_t* consumer_thread_attr)
+int AddConsumer(SocketId socket_id, int fd)
+int RegisterEvent(SocketId socket_id, int fd, bool pollin)
+int UnregisterEvent(SocketId socket_id, int fd, bool pollin)
+bool Running()
+void Stop()
+void Join()
-static void* RunThis(void* arg)
-void Run()
-int RemoveConsumer(int fd)

其支持epoll和kqueue

初始化

brpc的io事件分发器,使用多线程Reactor模式
通过InitializeGlobalDispatchers来初始化全局io事件分发器
分为task_group_ntags组,每组有event_dispatcher_num

void InitializeGlobalDispatchers() {
    g_edisp = new EventDispatcher[FLAGS_task_group_ntags * FLAGS_event_dispatcher_num];
    for (int i = 0; i < FLAGS_task_group_ntags; ++i) {
        for (int j = 0; j < FLAGS_event_dispatcher_num; ++j) {
            bthread_attr_t attr =
                FLAGS_usercode_in_pthread ? BTHREAD_ATTR_PTHREAD : BTHREAD_ATTR_NORMAL;
            attr.tag = (BTHREAD_TAG_DEFAULT + i) % FLAGS_task_group_ntags;
            CHECK_EQ(0, g_edisp[i * FLAGS_event_dispatcher_num + j].Start(&attr));
        }
    }
    // This atexit is will be run before g_task_control.stop() because above
    // Start() initializes g_task_control by creating bthread (to run epoll/kqueue).
    CHECK_EQ(0, atexit(StopAndJoinGlobalDispatchers));
}

分发策略

EventDispatcher& GetGlobalEventDispatcher(int fd, bthread_tag_t tag) {
    pthread_once(&g_edisp_once, InitializeGlobalDispatchers);
    if (FLAGS_task_group_ntags == 1 && FLAGS_event_dispatcher_num == 1) {
        return g_edisp[0];
    }
    int index = butil::fmix32(fd) % FLAGS_event_dispatcher_num;
    return g_edisp[tag * FLAGS_event_dispatcher_num + index];
}

读事件

对于acceptor,读事件处理函数为OnNewConnections

options.on_edge_triggered_events = OnNewConnections;

连接后新socket的读事件处理函数为OnNewDataFromTcp或者OnNewMessages

#if BRPC_WITH_RDMA
        if (am->_use_rdma) {
            options.on_edge_triggered_events = rdma::RdmaEndpoint::OnNewDataFromTcp;
        } else {
#else
        {
#endif
            options.on_edge_triggered_events = InputMessenger::OnNewMessages;
        }

写事件

对于非阻塞connect时,会调用事件分发器的RegisterEvent注册EPOLLOUT

int Socket::Connect(const timespec* abstime,
                    int (*on_connect)(int, int, void*), void* data) {
    if (_ssl_ctx) {
        _ssl_state = SSL_CONNECTING;
    } else {
        _ssl_state = SSL_OFF;
    }
    struct sockaddr_storage serv_addr;
    socklen_t addr_size = 0;
    if (butil::endpoint2sockaddr(remote_side(), &serv_addr, &addr_size) != 0) {
        PLOG(ERROR) << "Fail to get sockaddr";
        return -1;
    }
    butil::fd_guard sockfd(socket(serv_addr.ss_family, SOCK_STREAM, 0));
    if (sockfd < 0) {
        PLOG(ERROR) << "Fail to create socket";
        return -1;
    }
    CHECK_EQ(0, butil::make_close_on_exec(sockfd));
    // We need to do async connect (to manage the timeout by ourselves).
    CHECK_EQ(0, butil::make_non_blocking(sockfd));
    
    const int rc = ::connect(
        sockfd, (struct sockaddr*)&serv_addr, addr_size);
    if (rc != 0 && errno != EINPROGRESS) {
        PLOG(WARNING) << "Fail to connect to " << remote_side();
        return -1;
    }
    if (on_connect) {
        EpollOutRequest* req = new(std::nothrow) EpollOutRequest;
        if (req == NULL) {
            LOG(FATAL) << "Fail to new EpollOutRequest";
            return -1;
        }
        req->fd = sockfd;
        req->timer_id = 0;
        req->on_epollout_event = on_connect;
        req->data = data;
        // A temporary Socket to hold `EpollOutRequest', which will
        // be added into epoll device soon
        SocketId connect_id;
        SocketOptions options;
        options.bthread_tag = _bthread_tag;
        options.user = req;
        if (Socket::Create(options, &connect_id) != 0) {
            LOG(FATAL) << "Fail to create Socket";
            delete req;
            return -1;
        }
        // From now on, ownership of `req' has been transferred to
        // `connect_id'. We hold an additional reference here to
        // ensure `req' to be valid in this scope
        SocketUniquePtr s;
        CHECK_EQ(0, Socket::Address(connect_id, &s));

        // Add `sockfd' into epoll so that `HandleEpollOutRequest' will
        // be called with `req' when epoll event reaches
        if (GetGlobalEventDispatcher(sockfd, _bthread_tag).RegisterEvent(connect_id, sockfd, false) !=
            0) {
            const int saved_errno = errno;
            PLOG(WARNING) << "Fail to add fd=" << sockfd << " into epoll";
            s->SetFailed(saved_errno, "Fail to add fd=%d into epoll: %s",
                         (int)sockfd, berror(saved_errno));
            return -1;
        }
        
        // Register a timer for EpollOutRequest. Note that the timeout
        // callback has no race with the one above as both of them try
        // to `SetFailed' `connect_id' while only one of them can succeed
        // It also work when `HandleEpollOutRequest' has already been
        // called before adding the timer since it will be removed
        // inside destructor of `EpollOutRequest' after leaving this scope
        if (abstime) {
            int rc = bthread_timer_add(&req->timer_id, *abstime,
                                       HandleEpollOutTimeout,
                                       (void*)connect_id);
            if (rc) {
                LOG(ERROR) << "Fail to add timer: " << berror(rc);
                s->SetFailed(rc, "Fail to add timer: %s", berror(rc));
                return -1;
            }
        }
        
    } else {
        if (WaitEpollOut(sockfd, false, abstime) != 0) {
            PLOG(WARNING) << "Fail to wait EPOLLOUT of fd=" << sockfd;
            return -1;
        }
        if (CheckConnected(sockfd) != 0) {
            return -1;
        }
    }
    return sockfd.release();
}

EventDispatcher

初始化

对于epoll,创建epoll,以及创建管理,用于唤醒epoll的事件循环

EventDispatcher::EventDispatcher()
    : _epfd(-1)
    , _stop(false)
    , _tid(0)
    , _consumer_thread_attr(BTHREAD_ATTR_NORMAL)
{
    _epfd = epoll_create(1024 * 1024);
    if (_epfd < 0) {
        PLOG(FATAL) << "Fail to create epoll";
        return;
    }
    CHECK_EQ(0, butil::make_close_on_exec(_epfd));

    _wakeup_fds[0] = -1;
    _wakeup_fds[1] = -1;
    if (pipe(_wakeup_fds) != 0) {
        PLOG(FATAL) << "Fail to create pipe";
        return;
    }
}

启动事件循环

使用bthread_start_background启动协程执行RunThis

int EventDispatcher::Start(const bthread_attr_t* consumer_thread_attr) {
    if (_epfd < 0) {
        LOG(FATAL) << "epoll was not created";
        return -1;
    }
    
    if (_tid != 0) {
        LOG(FATAL) << "Already started this dispatcher(" << this 
                   << ") in bthread=" << _tid;
        return -1;
    }

    // Set _consumer_thread_attr before creating epoll thread to make sure
    // everyting seems sane to the thread.
    _consumer_thread_attr = (consumer_thread_attr  ?
                             *consumer_thread_attr : BTHREAD_ATTR_NORMAL);

    //_consumer_thread_attr is used in StartInputEvent(), assign flag NEVER_QUIT to it will cause new bthread
    // that created by epoll_wait() never to quit.
    bthread_attr_t epoll_thread_attr = _consumer_thread_attr | BTHREAD_NEVER_QUIT;

    // Polling thread uses the same attr for consumer threads (NORMAL right
    // now). Previously, we used small stack (32KB) which may be overflowed
    // when the older comlog (e.g. 3.1.85) calls com_openlog_r(). Since this
    // is also a potential issue for consumer threads, using the same attr
    // should be a reasonable solution.
    int rc = bthread_start_background(
        &_tid, &epoll_thread_attr, RunThis, this);
    if (rc) {
        LOG(FATAL) << "Fail to create epoll thread: " << berror(rc);
        return -1;
    }
    return 0;
}

void* EventDispatcher::RunThis(void* arg) {
    ((EventDispatcher*)arg)->Run();
    return NULL;
}

void EventDispatcher::Run() {
    while (!_stop) {
        epoll_event e[32];
#ifdef BRPC_ADDITIONAL_EPOLL
        // Performance downgrades in examples.
        int n = epoll_wait(_epfd, e, ARRAY_SIZE(e), 0);
        if (n == 0) {
            n = epoll_wait(_epfd, e, ARRAY_SIZE(e), -1);
        }
#else
        const int n = epoll_wait(_epfd, e, ARRAY_SIZE(e), -1);
#endif
        if (_stop) {
            // epoll_ctl/epoll_wait should have some sort of memory fencing
            // guaranteeing that we(after epoll_wait) see _stop set before
            // epoll_ctl.
            break;
        }
        if (n < 0) {
            if (EINTR == errno) {
                // We've checked _stop, no wake-up will be missed.
                continue;
            }
            PLOG(FATAL) << "Fail to epoll_wait epfd=" << _epfd;
            break;
        }
        for (int i = 0; i < n; ++i) {
            if (e[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP)
#ifdef BRPC_SOCKET_HAS_EOF
                || (e[i].events & has_epollrdhup)
#endif
                ) {
                // We don't care about the return value.
                Socket::StartInputEvent(e[i].data.u64, e[i].events,
                                        _consumer_thread_attr);
            }
        }
        for (int i = 0; i < n; ++i) {
            if (e[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP)) {
                // We don't care about the return value.
                Socket::HandleEpollOut(e[i].data.u64);
            }
        }
    }
}

读事件触发里,会执行StartInputEvent,将SocketId转成Socket,将_nevent事件数加1
执行注册的边缘触发函数_on_edge_triggered_events·

int Socket::StartInputEvent(SocketId id, uint32_t events,
                            const bthread_attr_t& thread_attr) {
    SocketUniquePtr s;
    if (Address(id, &s) < 0) {
        return -1;
    }
    if (NULL == s->_on_edge_triggered_events) {
        // Callback can be NULL when receiving error epoll events
        // (Added into epoll by `WaitConnected')
        return 0;
    }
    if (s->fd() < 0) {
#if defined(OS_LINUX)
        CHECK(!(events & EPOLLIN)) << "epoll_events=" << events;
#elif defined(OS_MACOSX)
        CHECK((short)events != EVFILT_READ) << "kqueue filter=" << events;
#endif
        return -1;
    }

    // if (events & has_epollrdhup) {
    //     s->_eof = 1;
    // }
    // Passing e[i].events causes complex visibility issues and
    // requires stronger memory fences, since reading the fd returns
    // error as well, we don't pass the events.
    if (s->_nevent.fetch_add(1, butil::memory_order_acq_rel) == 0) {
        // According to the stats, above fetch_add is very effective. In a
        // server processing 1 million requests per second, this counter
        // is just 1500~1700/s
        g_vars->neventthread << 1;

        bthread_t tid;
        // transfer ownership as well, don't use s anymore!
        Socket* const p = s.release();

        bthread_attr_t attr = thread_attr;
        attr.keytable_pool = p->_keytable_pool;
        attr.tag = bthread_self_tag();
        if (FLAGS_usercode_in_coroutine) {
            ProcessEvent(p);
        } else if (bthread_start_urgent(&tid, &attr, ProcessEvent, p) != 0) {
            LOG(FATAL) << "Fail to start ProcessEvent";
            ProcessEvent(p);
        }
    }
    return 0;
}

void* Socket::ProcessEvent(void* arg) {
    // the enclosed Socket is valid and free to access inside this function.
    SocketUniquePtr s(static_cast<Socket*>(arg));
    s->_on_edge_triggered_events(s.get());
    return NULL;
}

注册事件

RegisterEvent注册是将SocketId类型的数据添加到evt.data.u64,主要是注册写事件,根据参数pollin决定是修改注册事件,添加读写事件,使用的是边缘触发;在处理事件时,可以通过SocketId来得到对应的Socket

int EventDispatcher::RegisterEvent(SocketId socket_id, int fd, bool pollin) {
    if (_epfd < 0) {
        errno = EINVAL;
        return -1;
    }

    epoll_event evt;
    evt.data.u64 = socket_id;
    evt.events = EPOLLOUT | EPOLLET;
#ifdef BRPC_SOCKET_HAS_EOF
    evt.events |= has_epollrdhup;
#endif
    if (pollin) {
        evt.events |= EPOLLIN;
        if (epoll_ctl(_epfd, EPOLL_CTL_MOD, fd, &evt) < 0) {
            // This fd has been removed from epoll via `RemoveConsumer',
            // in which case errno will be ENOENT
            return -1;
        }
    } else {
        if (epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt) < 0) {
            return -1;
        }
    }
    return 0;
}

AddConsumer是注册读事件

int EventDispatcher::AddConsumer(SocketId socket_id, int fd) {
    if (_epfd < 0) {
        errno = EINVAL;
        return -1;
    }
    epoll_event evt;
    evt.events = EPOLLIN | EPOLLET;
    evt.data.u64 = socket_id;
#ifdef BRPC_SOCKET_HAS_EOF
    evt.events |= has_epollrdhup;
#endif
    return epoll_ctl(_epfd, EPOLL_CTL_ADD, fd, &evt);
    return -1;
}

注销事件

UnregisterEvent根据pollin来决定 ,如果pollin为false,则从epoll中删除对应的fd,否则将关注的事件只设置为可读

int EventDispatcher::UnregisterEvent(SocketId socket_id, 
                                    int fd, bool pollin) {
    if (pollin) {
        epoll_event evt;
        evt.data.u64 = socket_id;
        evt.events = EPOLLIN | EPOLLET;
#ifdef BRPC_SOCKET_HAS_EOF
        evt.events |= has_epollrdhup;
#endif
        return epoll_ctl(_epfd, EPOLL_CTL_MOD, fd, &evt);
    } else {
        return epoll_ctl(_epfd, EPOLL_CTL_DEL, fd, NULL);
    }
    return -1;
}

RemoveConsumer是从epoll中删除对应的fd

int EventDispatcher::RemoveConsumer(int fd) {
    if (fd < 0) {
        return -1;
    }
    // Removing the consumer from dispatcher before closing the fd because
    // if process was forked and the fd is not marked as close-on-exec,
    // closing does not set reference count of the fd to 0, thus does not
    // remove the fd from epoll. More badly, the fd will not be removable
    // from epoll again! If the fd was level-triggered and there's data left,
    // epoll_wait will keep returning events of the fd continuously, making
    // program abnormal.
    if (epoll_ctl(_epfd, EPOLL_CTL_DEL, fd, NULL) < 0) {
        PLOG(WARNING) << "Fail to remove fd=" << fd << " from epfd=" << _epfd;
        return -1;
    }
    return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

kgduu

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值