Linux网络IO基础组件实现
1 理论基础组件
1.1 Epoll模型概述
1.1.1 Epoll模型对比Select/poll模型的优势
(1)Epoll可以使用的最大描述符个数将以线性的关系仅仅依赖于内存
(2)IO效率不随描述符数目增加而线性下降,主要得益于回调处理机制。
(3)使用mmap加速内核与用户空间的消息传递,即内核和用户空间公用一块内存,减少不必要的拷贝。
1.1.2 Epoll基本用法
Epoll用法比较简单分为三个部分:第一是创建Epoll;第二是控制消息;最后是IO处理
创建接口:
epoll_create(1024);//创建一个epoll最大支持1024个文件描述符数量
控制接口:用于维护fd集合
//其中注册:EPOLL_CTL_ADD 修改:EPOLL_CTL_MOD 删除EPOLL_CTL_DEL
epoll_ctl(m_epfd, EPOLL_CTL_ADD, fd, &ev);//用于注册fd,添加相关事件
IO事件触发接口:
epoll_wait(m_epfd, events, 1024, wait_timeout);//轮询注集合的IO事件
1.1.3 Epoll触发方式
(1)水平触发Epoll-LT:具体工作方式,文件描述符就绪后,内核会一直触发直到用户处理。支持阻塞和非阻塞模式。
(2)边缘触发Epoll-ET:具体工作方式,文件描述符就绪后,内核会通知用户处理,用户不处理,将不会继续通知。仅支持非阻塞模式。
2 基本实现
有了前面的理论基础后可以开始进行网络基础组件的设计。本次的设计主要实现两个任务一个是socket的基本操作实现,第二个是事件的IO模型实现。组件内部只关注网络操作,事件分发,对应具体业务进行分离。
2.1 Socket管理组件
2.1.1 socket基本操作
这部分主要是进行socket常规操作,包括服务器监听启动、客户端连接请求、接收数据、发送数据、及关闭socket连接等操作。
int base_socket_listen(void* this, const char* server_ip, uint16_t port, callback_t callback, void* callback_data)
{
log_debug("base_socket_connect, server_ip=%s, port=%d", server_ip, port);
base_socket_t *p = (void *base_socket_t)this;
if(!p) return -1;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return -1;
priv->local_ip = server_ip;
priv->local_port = port;
priv->callback = callback;
priv->callback_data = callback_data;
priv->fd = socket(AF_INET, SOCK_STREAM, 0);
if (priv->fd == INVALID_SOCKET)
{
log_error("socket failed, err_code=%d, server_ip=%s, port=%u", _get_error_code(), server_ip, port);
return -1;
}
_set_reuse_addr(priv->fd);
_set_non_block(priv->fd);
sockaddr_in serv_addr;
_set_addr(server_ip, port, &serv_addr);
int ret = ::bind(priv->fd, (sockaddr*)&serv_addr, sizeof(serv_addr));
if (ret == SOCKET_ERROR)
{
log_error("bind failed, err_code=%d, server_ip=%s, port=%u", _get_error_code(), server_ip, port);
close(priv->fd);
return -1;
}
ret = listen(priv->fd, 64);
if (ret == SOCKET_ERROR)
{
log_error("listen failed, err_code=%d, server_ip=%s, port=%u", _get_error_code(), server_ip, port);
close(priv->fd);
return -1;
}
priv->state = SOCKET_STATE_LISTENING;
log_debug("Listen on %s:%d", server_ip, port);
manager_add_base_socket(this);
event_dispatch_add_event(priv->fd, SOCKET_READ | SOCKET_EXCEP);
return 0;
}
int base_socket_connect(void* this, const char* server_ip, uint16_t port, callback_t callback, void* callback_data)
{
log_debug("base_socket_connect, server_ip=%s, port=%d", server_ip, port);
base_socket_t *p = (void *base_socket_t)this;
if(!p) return -1;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return -1;
priv->remote_ip = server_ip;
priv->remote_port = port;
priv->callback = callback;
priv->callback_data = callback_data;
priv->fd = socket(AF_INET, SOCK_STREAM, 0);
if (priv->fd == INVALID_SOCKET)
{
log_error("socket failed, err_code=%d, server_ip=%s, port=%u", _get_error_code(), server_ip, port);
return -1;
}
_set_non_block(priv->fd);
_set_no_delay(priv->fd);
sockaddr_in serv_addr;
_set_addr(server_ip, port, &serv_addr);
int ret = connect(priv->fd, (sockaddr*)&serv_addr, sizeof(serv_addr));
if ( (ret == SOCKET_ERROR) && (!_is_block(_get_error_code())) )
{
log_error("connect failed, err_code=%d, server_ip=%s, port=%u", _get_error_code(), server_ip, port);
close(priv->fd);
return -1;
}
priv->state = SOCKET_STATE_CONNECTING;
manager_add_base_socket(this);
event_dispatch_add_event(priv->fd, SOCKET_ALL);
return priv->fd;
}
int base_socket_send(void* this, void* buf, int len)
{
base_socket_t *p = (void *base_socket_t)this;
if(!p) return -1;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return -1;
if (priv->state != SOCKET_STATE_CONNECTED)
return -1;
int ret = send(priv->fd, (char*)buf, len, 0);
if (ret == SOCKET_ERROR)
{
int err_code = _get_error_code();
if (_is_block(err_code))
{
ret = 0;
//log("socket send block fd=%d", priv->fd);
}
else
{
log_error("send failed, err_code=%d, len=%d", err_code, len);
}
}
return ret;
}
int base_socket_recv(void* this, void* buf, int len)
{
base_socket_t *p = (void *base_socket_t)this;
if(!p) return -1;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return -1;
return recv(priv->fd, (char*)buf, len, 0);
}
int base_socket_colse(void* this)
{
base_socket_t *p = (void *base_socket_t)this;
if(!p) return -1;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return -1;
event_dispatch_remove_event(priv->fd, SOCKET_ALL);
manager_remove_base_socket(this);
close(priv->fd);
base_socket_release_ref();
return 0;
}
2.1.2 socket事件响应
这部分主要是用于event_dispatch事件触发后调用,当IO事件上报后,进行相关回调操作。
void base_socket_onread(void* this)
{
base_socket_t *p = (void *base_socket_t)this;
if(!p) return ;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return ;
if (priv->state == SOCKET_STATE_LISTENING)
{
_accept_new_socket();
}
else
{
u_long avail = 0;
int ret = ioctlsocket(priv->fd, FIONREAD, &avail);
if ( (SOCKET_ERROR == ret) || (avail == 0) )
{
priv->call_back(priv->callback_data, SOCKET_MSG_CLOSE, (int)priv->fd, NULL);
}
else
{
priv->call_back(priv->callback_data, SOCKET_MSG_READ, (int)priv->fd, NULL);
}
}
}
void base_socket_onwrite(void* this)
{
base_socket_t *p = (void *base_socket_t)this;
if(!p) return ;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return ;
if (priv->state == SOCKET_STATE_CONNECTING)
{
int error = 0;
socklen_t len = sizeof(error);
getsockopt(priv->fd, SOL_SOCKET, SO_ERROR, (void*)&error, &len);
if (error) {
priv->call_back(priv->callback_data, SOCKET_MSG_CLOSE, (int)priv->fd, NULL);
} else {
priv->state = SOCKET_STATE_CONNECTED;
priv->call_back(priv->callback_data, SOCKET_MSG_CONFIRM, (int)priv->fd, NULL);
}
}
else
{
priv->call_back(priv->callback_data, SOCKET_MSG_WRITE, (int)priv->fd, NULL);
}
}
void base_socket_onclose(void* this)
{
base_socket_t *p = (void *base_socket_t)this;
if(!p) return ;
base_socket_priv_t *priv = (base_socket_priv_t *)p->priv;
if(!priv) return ;
priv->state = SOCKET_STATE_CLOSING;
priv->call_back(priv->callback_data, SOCKET_MSG_CLOSE, (int)priv->fd, NULL);
}
2.2 Epoll事件处理组件
2.2.1 Epoll事件管理
这部分用于添加socket和移除epoll管理事件
void event_dispatch_add_event(int fd, uint8_t socket_event)
{
struct epoll_event ev;
ev.events = EPOLLIN | EPOLLOUT | EPOLLET | EPOLLPRI | EPOLLERR | EPOLLHUP;
ev.data.fd = fd;
if (epoll_ctl(cxt.epfd, EPOLL_CTL_ADD, fd, &ev) != 0)
{
log_error("epoll_ctl() failed, errno=%d", errno);
}
}
void event_dispatch_remove_event(int fd, uint8_t socket_event)
{
if (epoll_ctl(cxt.epfd, EPOLL_CTL_DEL, fd, NULL) != 0)
{
log_error("epoll_ctl failed, errno=%d", errno);
}
}
2.2.2 Epoll事件分发
这部分主要是对应定时器事件,循环处理事件添加和检查触发添加的事件;第二部分是启动整个epoll事件管理,包括socket事件响应和外部添加的定时机器,循环处理事件都是在主循环内完成。
void event_dispatch_add_timer(callback_t callback, void* user_data, uint64_t interval)
{
list<timer_item_t*>::iterator it;
for (it = m_timer_list.begin(); it != m_timer_list.end(); it++)
{
timer_item_t* pitem = *it;
if (pitem->callback == callback && pitem->user_data == user_data)
{
pitem->interval = interval;
pitem->next_tick = get_tick_count() + interval;
return;
}
}
timer_item_t* pitem = (timer_item_t*) malloc(sizeof(timer_item_t));
pitem->callback = callback;
pitem->user_data = user_data;
pitem->interval = interval;
pitem->next_tick = get_tick_count() + interval;
m_timer_list.push_back(pitem);
}
void event_dispatch_remove_timer(callback_t callback, void* user_data)
{
list<timer_item_t*>::iterator it;
for (it = m_timer_list.begin(); it != m_timer_list.end(); it++)
{
timer_item_t* pitem = *it;
if (pitem->callback == callback && pitem->user_data == user_data)
{
m_timer_list.erase(it);
delete pitem;
return;
}
}
}
void _check_timer()
{
uint64_t curr_tick = get_tick_count();
list<timer_item_t*>::iterator it;
for (it = m_timer_list.begin(); it != m_timer_list.end(); )
{
timer_item_t* pitem = *it;
it++; // iterator maybe deleted in the callback, so we should increment it before callback
if (curr_tick >= pitem->next_tick)
{
pitem->next_tick += pitem->interval;
pitem->callback(pitem->user_data, NETLIB_MSG_TIMER, 0, NULL);
}
}
}
void event_dispatch_add_loop(callback_t callback, void* user_data)
{
timer_item_t* pitem = new timer_item_t;
pitem->callback = callback;
pitem->user_data = user_data;
m_loop_list.push_back(pitem);
}
void _check_loop()
{
for (list<timer_item_t*>::iterator it = m_loop_list.begin(); it != m_loop_list.end(); it++) {
timer_item_t* pitem = *it;
pitem->callback(pitem->user_data, NETLIB_MSG_LOOP, 0, NULL);
}
}
void event_dispatch_start_dispatch(uint32_t wait_timeout)
{
struct epoll_event events[1024];
int nfds = 0;
if(cxt.running)
return;
cxt.running = true;
while (cxt.running)
{
nfds = epoll_wait(cxt.epfd, events, 1024, wait_timeout);
for (int i = 0; i < nfds; i++)
{
int ev_fd = events[i].data.fd;
base_socket_t* psocket = manager_find_base_socket(ev_fd);
if (!psocket)
continue;
if (events[i].events & EPOLLRDHUP)
{
psocket->onclose();
}
if (events[i].events & EPOLLIN)
{
psocket->onread();
}
if (events[i].events & EPOLLOUT)
{
psocket->onwirte();
}
if (events[i].events & (EPOLLPRI | EPOLLERR | EPOLLHUP))
{
psocket->onclose();
}
psocket->release_ref();
}
_check_timer();
_check_loop();
}
}
void event_dispatch_stop_dispatch()
{
cxt.running = false;
}