众所周知,TCP是流传输协议,存在粘包问题;发送端每次发送一个单元的数据,接收端收到的却不一定是一个单元的数据,可能是0.5个单元,1.5个单元或者其它任意大小的数据长度。所以接收端需要有一个可动态存储的接收缓存区和数据完整性验证机制;完整性验证需要依赖于具体的应用层协议;下面就通过mongoose http服务器的源码来学习如何实现动态缓存和完整性验证。
MG_INTERNAL void mg_recv_common(struct mg_connection *nc, void *buf, int len,
int own) {
DBG(("%p %d %u", nc, len, (unsigned int) nc->recv_mbuf.len));
if (nc->flags & MG_F_CLOSE_IMMEDIATELY) {
DBG(("%p discarded %d bytes", nc, len));
/*
* This connection will not survive next poll. Do not deliver events,
* send data to /dev/null without acking.
*/
if (own) {
MG_FREE(buf);
}
return;
}
nc->last_io_time = (time_t) mg_time();
if (!own) {
mbuf_append(&nc->recv_mbuf, buf, len);
} else if (nc->recv_mbuf.len == 0) {
/* Adopt buf as recv_mbuf's backing store. */
mbuf_free(&nc->recv_mbuf);
nc->recv_mbuf.buf = (char *) buf;
nc->recv_mbuf.size = nc->recv_mbuf.len = len;
} else {
mbuf_append(&nc->recv_mbuf, buf, len);
MG_FREE(buf);
}
mg_call(nc, NULL, MG_EV_RECV, &len);
}
每次读操作时,先申请MG_TCP_RECV_BUFFER_SIZE大小的空间来存储网络数据,然后调用mg_recv_common函数进行处理,函数代码如上。
这段代码中有两个重点的地方,mbuf_append函数和mg_call函数;mbuf_append函数将刚接收到数据包扩展到缓冲区中,mg_call函数则调用应用层逻辑进行处理;
下面先介绍mbuf_append函数
size_t mbuf_append(struct mbuf *a, const void *buf, size_t len) {
return mbuf_insert(a, a->len, buf, len);
}
size_t mbuf_insert(struct mbuf *a, size_t off, const void *buf, size_t len) {
char *p = NULL;
assert(a != NULL);
assert(a->len <= a->size);
assert(off <= a->len);
/* check overflow */
if (~(size_t) 0 - (size_t) a->buf < len) return 0;
if (a->len + len <= a->size) {
memmove(a->buf + off + len, a->buf + off, a->len - off);
if (buf != NULL) {
memcpy(a->buf + off, buf, len);
}
a->len += len;
} else {
size_t new_size = (size_t)((a->len + len) * MBUF_SIZE_MULTIPLIER);
if ((p = (char *) MBUF_REALLOC(a->buf, new_size)) != NULL) {
a->buf = p;
memmove(a->buf + off + len, a->buf + off, a->len - off);
if (buf != NULL) memcpy(a->buf + off, buf, len);
a->len += len;
a->size = new_size;
} else {
len = 0;
}
}
return len;
}
mbuf_append调用mbuf_insert将数据插入到缓冲区可用数据的末尾,mbuf_insert支持将数据插入到缓冲区中off位置,这个很有意思:=) ,但在http协议中用不到(其它协议可能用到),http协议只需不断的扩展的末尾就行(实际上a->len == off,memmove移动的字节为0)。
扩展过程中如果发现缓冲区不够大,则会重新分配MBUF_SIZE_MULTIPLIER倍数的缓冲区来存储(老缓冲区中的数据会被拷贝过来)。
这样动态缓冲机制就实现了(其实很简单,哈哈),接下来就把数据提交应用层处理啦,调用mg_call函数。
MG_INTERNAL void mg_call(struct mg_connection *nc,
mg_event_handler_t ev_handler, int ev, void *ev_data) {
if (ev_handler == NULL) {
/*
* If protocol handler is specified, call it. Otherwise, call user-specified
* event handler.
*/
ev_handler = nc->proto_handler ? nc->proto_handler : nc->handler;
}
if (ev != MG_EV_POLL) {
DBG(("%p %s ev=%d ev_data=%p flags=%lu rmbl=%d smbl=%d", nc,
ev_handler == nc->handler ? "user" : "proto", ev, ev_data, nc->flags,
(int) nc->recv_mbuf.len, (int) nc->send_mbuf.len));
}
if (ev_handler != NULL) {
unsigned long flags_before = nc->flags;
size_t recv_mbuf_before = nc->recv_mbuf.len, recved;
ev_handler(nc, ev, ev_data);
recved = (recv_mbuf_before - nc->recv_mbuf.len);
/* Prevent user handler from fiddling with system flags. */
if (ev_handler == nc->handler && nc->flags != flags_before) {
nc->flags = (flags_before & ~_MG_CALLBACK_MODIFIABLE_FLAGS_MASK) |
(nc->flags & _MG_CALLBACK_MODIFIABLE_FLAGS_MASK);
}
if (recved > 0 && !(nc->flags & MG_F_UDP)) {
nc->iface->vtable->recved(nc, recved);
}
}
if (ev != MG_EV_POLL) {
DBG(("%p after %s flags=%lu rmbl=%d smbl=%d", nc,
ev_handler == nc->handler ? "user" : "proto", nc->flags,
(int) nc->recv_mbuf.len, (int) nc->send_mbuf.len));
}
}
mg_call函数中只需注意一点,就是调用protocal handler,就是用户指定的应用层协议处理逻辑;当我们使用mongoose创建一个HTTP server时,会调用mg_set_protocol_http_websocket函数指定HTTP协议为应用层协议。
void mg_set_protocol_http_websocket(struct mg_connection *nc) {
nc->proto_handler = mg_http_handler;
}
接下来的重点就是mg_http_handler函数啦,请看代码:
void mg_http_handler(struct mg_connection *nc, int ev, void *ev_data) {
struct http_message hm;
mg_http_handler2(nc, ev, ev_data, &hm);
}
static void mg_http_handler2(struct mg_connection *nc, int ev, void *ev_data,
struct http_message *hm) {
struct mg_http_proto_data *pd = mg_http_get_proto_data(nc);
struct mbuf *io = &nc->recv_mbuf;
int req_len;
const int is_req = (nc->listener != NULL);
#if MG_ENABLE_HTTP_WEBSOCKET
struct mg_str *vec;
#endif
if (ev == MG_EV_CLOSE) {
if (io->len > 0 && mg_parse_http(io->buf, io->len, hm, is_req) > 0) {
/*
* For HTTP messages without Content-Length, always send HTTP message
* before MG_EV_CLOSE message.
*/
int ev2 = is_req ? MG_EV_HTTP_REQUEST : MG_EV_HTTP_REPLY;
hm->message.len = io->len;
hm->body.len = io->buf + io->len - hm->body.p;
mg_http_call_endpoint_handler(nc, ev2, hm);
}
}
#if MG_ENABLE_FILESYSTEM
if (pd->file.fp != NULL) {
mg_http_transfer_file_data(nc);
}
#endif
mg_call(nc, nc->handler, ev, ev_data);
if (ev == MG_EV_RECV) {
struct mg_str *s;
req_len = mg_parse_http(io->buf, io->len, hm, is_req);
if (req_len > 0 &&
(s = mg_get_http_header(hm, "Transfer-Encoding")) != NULL &&
mg_vcasecmp(s, "chunked") == 0) {
mg_handle_chunked(nc, hm, io->buf + req_len, io->len - req_len);
}
/* TODO(alashkin): refactor this ifelseifelseifelseifelse */
if ((req_len < 0 ||
(req_len == 0 && io->len >= MG_MAX_HTTP_REQUEST_SIZE))) {
DBG(("invalid request"));
nc->flags |= MG_F_CLOSE_IMMEDIATELY;
} else if (req_len == 0) {
/* Do nothing, request is not yet fully buffered */
}else if (hm->message.len <= io->len) {
int trigger_ev = nc->listener ? MG_EV_HTTP_REQUEST : MG_EV_HTTP_REPLY;
/* Whole HTTP message is fully buffered, call event handler */
mg_http_call_endpoint_handler(nc, trigger_ev, hm);
mbuf_remove(io, hm->message.len);
}
}
(void) pd;
}
mg_http_handler函数调用了mg_http_handler2函数,这个函数很长,我删掉一些无关的,重点关注mg_parse_http函数,该函数尝试做HTTP协议解析,如果解析失败,则证明HTTP包没有完全缓冲完成(not yet fully buffered),接下来不做任何处理。如果解析成功,则获取HTTP包头的各个字段,并调用mg_http_call_endpoint_handler函数把数据传递给终端回调函数(就是用户自己代码中设置的回调函数,用来获取HTTP数据包)。下面是mg_parse_http函数的源代码:
int mg_parse_http(const char *s, int n, struct http_message *hm, int is_req) {
const char *end, *qs;
int len = mg_http_get_request_len(s, n);
if (len <= 0) return len;
memset(hm, 0, sizeof(*hm));
hm->message.p = s;
hm->body.p = s + len;
hm->message.len = hm->body.len = (size_t) ~0;
end = s + len;
/* Request is fully buffered. Skip leading whitespaces. */
while (s < end && isspace(*(unsigned char *) s)) s++;
if (is_req) {
/* Parse request line: method, URI, proto */
s = mg_skip(s, end, " ", &hm->method);
s = mg_skip(s, end, " ", &hm->uri);
s = mg_skip(s, end, "\r\n", &hm->proto);
if (hm->uri.p <= hm->method.p || hm->proto.p <= hm->uri.p) return -1;
/* If URI contains '?' character, initialize query_string */
if ((qs = (char *) memchr(hm->uri.p, '?', hm->uri.len)) != NULL) {
hm->query_string.p = qs + 1;
hm->query_string.len = &hm->uri.p[hm->uri.len] - (qs + 1);
hm->uri.len = qs - hm->uri.p;
}
} else {
s = mg_skip(s, end, " ", &hm->proto);
if (end - s < 4 || s[3] != ' ') return -1;
hm->resp_code = atoi(s);
if (hm->resp_code < 100 || hm->resp_code >= 600) return -1;
s += 4;
s = mg_skip(s, end, "\r\n", &hm->resp_status_msg);
}
s = mg_http_parse_headers(s, end, len, hm);
/*
* mg_parse_http() is used to parse both HTTP requests and HTTP
* responses. If HTTP response does not have Content-Length set, then
* body is read until socket is closed, i.e. body.len is infinite (~0).
*
* For HTTP requests though, according to
* http://tools.ietf.org/html/rfc7231#section-8.1.3,
* only POST and PUT methods have defined body semantics.
* Therefore, if Content-Length is not specified and methods are
* not one of PUT or POST, set body length to 0.
*
* So,
* if it is HTTP request, and Content-Length is not set,
* and method is not (PUT or POST) then reset body length to zero.
*/
if (hm->body.len == (size_t) ~0 && is_req &&
mg_vcasecmp(&hm->method, "PUT") != 0 &&
mg_vcasecmp(&hm->method, "POST") != 0) {
hm->body.len = 0;
hm->message.len = len;
}
return len;
}
等待终端用户处理完成后,最后最后很重要的一点是,调用mbuf_remove将已经被处理的缓冲区数据删除(缓冲区不需要释放,继续使用),实际上就是将未被处理的数据移动到缓冲区的开始位置。
void mbuf_remove(struct mbuf *mb, size_t n) {
if (n > 0 && n <= mb->len) {
memmove(mb->buf, mb->buf + n, mb->len - n);
mb->len -= n;
}
}
接下来就继续接收网络数据,再扩展到这个缓冲区,再进行完整性验证......