参考吴秦的博客:http://www.cnblogs.com/skynet/archive/2010/07/24/1784476.html
Mongoose的主线程master_thread在接受一个新的client连接请求时,会将client的socket地址放入一个queue(调用put_socket()方法);而当worker_thread线程处理client的请求时,是通过get_socket()方法从queue取出client的socket地址,然后与它建立连接。
建立连接就用到了数据结构mg_connection,该结构保存了client的连接信息。该结构体中有两个非常重要的成员:mg_request_info用于保存client的请求信息、mg_context用于保存该client请求的mongoose上下文。
新的工作进程worker_thread去处理client的请求,之后就是释放信号量等资源,让其它client请求也能够请求到资源工作,如启动了一个工作进程去处理client请求,这时queue就空出一个位置了,它会调用pthread_cond_signal(&ctx->empty_cond)让等待的client请求知道queue中有位置了。最后就是释放put_socket()中一开始设置的锁,(void) pthread_mutex_unlock(&ctx->thr_mutex)。
到了这里,client的请求已经被分配打一个工作线程中去了。而且不同的client请求处理运行在不同的工作线程中,能够互不干扰。在worker_thread中,首先与client建立连接,只有连接上了才能为client服务。连接建立之后调用process_new_connection()去处理请求。处理完之后返回关闭连接,并通过信号机制告诉主线程,我的做工做完了。
在process_new_connection()中处理工作:首先解析请求parse_http_request(),知道请求的内容;接着就是进入Mongoose处理client请求的真正核心工作了analyze_request()。这里就不详细介绍parse_http_request()、analyze_request()是如何去解析、验证、提供具体服务的,否则就陷入了细节出不来了,这里主要是介绍Mongoose的生命之旅的主线。
worker_thread 函数:
这样client发送的HTTP请求消息就被worker_thread读取到了,并存储在buf中, 接下来的工作就是解析读取到的请求信息,明白client到底想干嘛,说白了就从buf中提取信息并存储到结构体mg_request_info中去。
static void *worker_thread(void *thread_func_param) {
struct mg_context *ctx = thread_func_param;
struct mg_connection *conn;
conn = (struct mg_connection *) calloc(1, sizeof(*conn) + MAX_REQUEST_SIZE);
if (conn == NULL) {
cry(fc(ctx), "%s", "Cannot create new connection struct, OOM");
} else {
conn->buf_size = MAX_REQUEST_SIZE;
conn->buf = (char *) (conn + 1);
// Call consume_socket() even when ctx->stop_flag > 0, to let it signal
// sq_empty condvar to wake up the master waiting in produce_socket()
while (consume_socket(ctx, &conn->client)) { // Worker threads从队列中将接受的socket取出
conn->birth_time = time(NULL);
conn->ctx = ctx;
// Fill in IP, port info early so even if SSL setup below fails,
// error handler would have the corresponding info.
// Thanks to Johannes Winkelmann for the patch.
// TODO(lsm): Fix IPv6 case
conn->request_info.remote_port = ntohs(conn->client.rsa.sin.sin_port);
memcpy(&conn->request_info.remote_ip,
&conn->client.rsa.sin.sin_addr.s_addr, 4);
conn->request_info.remote_ip = ntohl(conn->request_info.remote_ip);
conn->request_info.is_ssl = conn->client.is_ssl;
if (!conn->client.is_ssl ||
(conn->client.is_ssl &&
sslize(conn, conn->ctx->ssl_ctx, SSL_accept))) {
process_new_connection(conn);
}
close_connection(conn);
}
free(conn);
}
// Signal master that we're done with connection and exiting
(void) pthread_mutex_lock(&ctx->mutex);
ctx->num_threads--;
(void) pthread_cond_signal(&ctx->cond);
assert(ctx->num_threads >= 0);
(void) pthread_mutex_unlock(&ctx->mutex);
DEBUG_TRACE(("exiting"));
return NULL;
}
process_new_connection函数:处理请求。处理完之后返回关闭连接,并通过信号机制告诉主线程
首先read_request接收请求的内容和长度,
static void process_new_connection(struct mg_connection *conn) {
struct mg_request_info *ri = &conn->request_info;
int keep_alive_enabled, keep_alive, discard_len;
const char *cl;
keep_alive_enabled = !strcmp(conn->ctx->config[ENABLE_KEEP_ALIVE], "yes");
keep_alive = 0;
// Important: on new connection, reset the receiving buffer. Credit goes
// to crule42.
conn->data_len = 0;
do {
reset_per_request_attributes(conn);
conn->request_len = read_request(NULL, conn, conn->buf, conn->buf_size,
&conn->data_len);
assert(conn->request_len < 0 || conn->data_len >= conn->request_len);
if (conn->request_len == 0 && conn->data_len == conn->buf_size) {
send_http_error(conn, 413, "Request Too Large", "%s", "");
return;
} if (conn->request_len <= 0) {
return; // Remote end closed the connection
}
if (parse_http_request(conn->buf, conn->buf_size, ri) <= 0 || //析出HTTP请求消息的各个字段之后,就做一些简单的验证工作,比如说HTTP版本的验证
!is_valid_uri(ri->uri)) {
// Do not put garbage in the access log, just send it back to the client
send_http_error(conn, 400, "Bad Request",
"Cannot parse HTTP request: [%.*s]", conn->data_len, conn->buf);
conn->must_close = 1;
} else if (strcmp(ri->http_version, "1.0") &&
strcmp(ri->http_version, "1.1")) {
// Request seems valid, but HTTP version is strange
send_http_error(conn, 505, "HTTP version not supported", "%s", "");
log_access(conn);
} else {
// Request is valid, handle it 请求合法 进行处理
if ((cl = get_header(ri, "Content-Length")) != NULL) {
conn->content_len = strtoll(cl, NULL, 10); // 内容长度 字符串转换成long long
} else if (!mg_strcasecmp(ri->request_method, "POST") ||
!mg_strcasecmp(ri->request_method, "PUT")) {
conn->content_len = -1; //如果是POST或者GET 则内容长度为-1
} else {
conn->content_len = 0;
}
conn->birth_time = time(NULL);
handle_request(conn);
conn->request_info.ev_data = (void *) conn->status_code;//状态码
call_user(conn, MG_REQUEST_COMPLETE);
log_access(conn);
}
if (ri->remote_user != NULL) {
free((void *) ri->remote_user);
}
// NOTE(lsm): order is important here. should_keep_alive() call
// is using parsed request, which will be invalid after memmove's below.
// Therefore, memorize should_keep_alive() result now for later use
// in loop exit condition.
keep_alive = should_keep_alive(conn);
// Discard all buffered data for this request
discard_len = conn->content_len >= 0 &&
conn->request_len + conn->content_len < (int64_t) conn->data_len ?
(int) (conn->request_len + conn->content_len) : conn->data_len;
memmove(conn->buf, conn->buf + discard_len, conn->data_len - discard_len);
conn->data_len -= discard_len;
assert(conn->data_len >= 0);
assert(conn->data_len <= conn->buf_size);
} while (conn->ctx->stop_flag == 0 &&
keep_alive_enabled &&
conn->content_len >= 0 &&
keep_alive);
}
其中:
read_request函数,返回接受的长度 -1 代表接受失败
static int read_request(FILE *fp, struct mg_connection *conn,
char *buf, int bufsiz, int *nread) {
int request_len, n = 1;
request_len = get_request_len(buf, *nread);
while (*nread < bufsiz && request_len == 0 && n > 0) {
n = pull(fp, conn, buf + *nread, bufsiz - *nread);
if (n > 0) {
*nread += n;
request_len = get_request_len(buf, *nread);
}
}
if (n < 0) {
// recv() error -> propagate error; do not process a b0rked-with-very-high-probability request
return -1;
}
return request_len;
}
poll函数 读数据到IO,打开描述符(ssl、socket)返回读入的字节
static int pull(FILE *fp, struct mg_connection *conn, char *buf, int len) {
int nread;
if (fp != NULL) {
// Use read() instead of fread(), because if we're reading from the CGI
// pipe, fread() may block until IO buffer is filled up. We cannot afford
// to block and must pass all read bytes immediately to the client.
nread = read(fileno(fp), buf, (size_t) len);
} else if (!conn->must_close && !wait_until_socket_is_readable(conn)) {
nread = -1;
} else if (conn->ssl != NULL) {
nread = SSL_read(conn->ssl, buf, len);
} else {
nread = recv(conn->client.sock, buf, (size_t) len, 0);
}
return conn->ctx->stop_flag ? -1 : nread;
}
通过测试返回值为:
【parse_http_request】 buf:GET /access.log HTTP/1.1
Accept: */*
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; InfoPath.1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)
Host: 10.7.12.213:8080
Connection: Close
Referer: http://10.7.12.213:8080/
Cache-Control: no-cache
parse_http_request函数中实际上执行的是parse_http_message
static int parse_http_request(char *buf, int len, struct mg_request_info *ri) {
printf("【parse_http_request】 buf:%s,the len:%d\n",buf,len); //update by ly
int result = parse_http_message(buf, len, ri);
if (result > 0 &&
is_valid_http_method(ri->request_method) && //判断方法是否可用 post get等等
!strncmp(ri->http_version, "HTTP/", 5)) {
ri->http_version += 5; // Skip "HTTP/"
} else {
result = -1;
}
return result;
}
parse_http_message函数 获取mg_request_info结构体中的request_method、uri、http_version
static int parse_http_message(char *buf, int len, struct mg_request_info *ri) {
int request_length = get_request_len(buf, len);
if (request_length > 0) {
// Reset attributes. DO NOT TOUCH is_ssl, remote_ip, remote_port
ri->remote_user = ri->request_method = ri->uri = ri->http_version = NULL;
ri->num_headers = 0;
buf[request_length - 1] = '\0';
// RFC says that all initial whitespaces should be ingored
while (*buf != '\0' && isspace(* (unsigned char *) buf)) {
buf++;
}
ri->request_method = skip(&buf, " ");
ri->uri = skip(&buf, " ");
ri->http_version = skip(&buf, "\r\n");
printf("request_method:[%s],uri:[%s],http_version:[%s]\n",ri->request_method,ri->uri,ri->http_version); //打印结构体 parse_http_headers(&buf, ri);
}
return request_length;
}
我们来分析一下skip的作用及实现。如要从buf中解析出client请求的methods是哪个(PUT、GET、POST等等)?只需要这样做就可以了:
ri->request_method = skip(&buf, " ");
为了分析,到底是如何实现这个的,我在porcess_new_connection()中加入下面一行输出buf信息的代码:
看当我们想mongoose发送的请求信息,这时我们在浏览其中输入http://ip:8080,终端会输出buf的信息,如下:
看到第一行就是GET /favicon.ico HTTP/1.1。知道了buf中的字符信息,但在我们分析skip(&buf, " ")是如何提取出GET的之前,还要知道strcspn、strspn的作用,下面是它们的原型:
#include <string.h> size_t strspn(const char *s, const char *accept); size_t strcspn(const char *s, const char *reject);
下面解释它们的作用:
DESCRIPTION
The strspn() function calculates the length of the initial segment of s
which consists entirely of characters in accept.
The strcspn() function calculates the length of the initial segment of s which consists entirely of characters not in reject.
RETURN VALUE
The strspn() function returns the number of characters in the initial segment of s which consist only of characters from accept.
The strcspn() function returns the number of characters in the initial segment of s which are not in the string reject.
现在已经万事俱备了,skip(&buf, " ")的执行情况如下:
测试结果:
parse_http_headers函数 num_headers头的数量,http_headers[].name 和http_headers[].value
static void parse_http_headers(char **buf, struct mg_request_info *ri) {
int i;
for (i = 0; i < (int) ARRAY_SIZE(ri->http_headers); i++) {
ri->http_headers[i].name = skip_quoted(buf, ":", " ", 0);
ri->http_headers[i].value = skip(buf, "\r\n");
printf("name:[%s],and value:[%s]\n",ri->http_headers[i].name ,ri->http_headers[i].value);//update by
if (ri->http_headers[i].name[0] == '\0')
break;
ri->num_headers = i + 1;
}
}
测试结果:
【parse_http_request】 buf:GET /access.log HTTP/1.1
Accept: */*
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; InfoPath.1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)
Host: 10.7.12.213:8080
Connection: Close
Referer: http://10.7.12.213:8080/
Cache-Control: no-cache
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
name:[Accept],and value:[*/*]
name:[User-Agent],and value:[Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; InfoPath.1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)]
name:[Host],and value:[10.7.12.213:8080]
name:[Connection],and value:[Close]
name:[Referer],and value:[http://10.7.12.213:8080/]
name:[Cache-Control],and value:[no-cache]
name:[],and value:[]
get_header函数 遍历获取http_headers内容
static const char *get_header(const struct mg_request_info *ri,
const char *name) {
int i;
for (i = 0; i < ri->num_headers; i++)
if (!mg_strcasecmp(name, ri->http_headers[i].name))
return ri->http_headers[i].value;
return NULL;
}
handle_request函数解析这是mongoose的核心程序,当请求读取、解析、并且可用时此函数被调用,mongoose必须才去行动去处理:serve a file, or a directory, or call embedded function, etcetera.
static void handle_request(struct mg_connection *conn) {
struct mg_request_info *ri = &conn->request_info;
char path[PATH_MAX];
int uri_len;
struct file file = STRUCT_FILE_INITIALIZER;
if ((conn->request_info.query_string = strchr(ri->uri, '?')) != NULL) {
* ((char *) conn->request_info.query_string++) = '\0';
}
uri_len = (int) strlen(ri->uri);
url_decode(ri->uri, uri_len, (char *) ri->uri, uri_len + 1, 0);
remove_double_dots_and_double_slashes((char *) ri->uri);
convert_uri_to_file_name(conn, path, sizeof(path), &file);
conn->throttle = set_throttle(conn->ctx->config[THROTTLE],
get_remote_ip(conn), ri->uri);
DEBUG_TRACE(("%s", ri->uri));
if (!is_put_or_delete_request(conn) && !check_authorization(conn, path)) {
send_authorization_request(conn);
#if defined(USE_WEBSOCKET)
} else if (is_websocket_request(conn)) {
handle_websocket_request(conn);
#endif
} else if (call_user(conn, MG_NEW_REQUEST) != NULL) {
// Do nothing, callback has served the request
} else if (!strcmp(ri->request_method, "OPTIONS")) {
send_options(conn);
} else if (conn->ctx->config[DOCUMENT_ROOT] == NULL) {
send_http_error(conn, 404, "Not Found", "Not Found");
} else if (is_put_or_delete_request(conn) &&
(conn->ctx->config[PUT_DELETE_PASSWORDS_FILE] == NULL ||
is_authorized_for_put(conn) != 1)) {
send_authorization_request(conn);
} else if (!strcmp(ri->request_method, "PUT")) {
put_file(conn, path);
} else if (!strcmp(ri->request_method, "DELETE")) {
if (mg_remove(path) == 0) {
send_http_error(conn, 200, "OK", "%s", "");
} else {
send_http_error(conn, 500, http_500_error, "remove(%s): %s", path,
strerror(ERRNO));
}
} else if ((file.membuf == NULL && file.modification_time == (time_t) 0) ||
must_hide_file(conn, path)) {
send_http_error(conn, 404, "Not Found", "%s", "File not found");
} else if (file.is_directory && ri->uri[uri_len - 1] != '/') {
mg_printf(conn, "HTTP/1.1 301 Moved Permanently\r\n"
"Location: %s/\r\n\r\n", ri->uri);
} else if (!strcmp(ri->request_method, "PROPFIND")) {
handle_propfind(conn, path, &file);
} else if (file.is_directory &&
!substitute_index_file(conn, path, sizeof(path), &file)) {
if (!mg_strcasecmp(conn->ctx->config[ENABLE_DIRECTORY_LISTING], "yes")) {
handle_directory_request(conn, path);
} else {
send_http_error(conn, 403, "Directory Listing Denied",
"Directory listing denied");
}
#ifdef USE_LUA
} else if (match_prefix("**.lp$", 6, path) > 0) {
handle_lsp_request(conn, path, &file);
#endif
#if !defined(NO_CGI)
} else if (match_prefix(conn->ctx->config[CGI_EXTENSIONS],
strlen(conn->ctx->config[CGI_EXTENSIONS]),
path) > 0) {
if (strcmp(ri->request_method, "POST") &&
strcmp(ri->request_method, "HEAD") &&
strcmp(ri->request_method, "GET")) {
send_http_error(conn, 501, "Not Implemented",
"Method %s is not implemented", ri->request_method);
} else {
handle_cgi_request(conn, path);
}
#endif // !NO_CGI
} else if (match_prefix(conn->ctx->config[SSI_EXTENSIONS],
strlen(conn->ctx->config[SSI_EXTENSIONS]),
path) > 0) {
handle_ssi_file_request(conn, path);
} else if (is_not_modified(conn, &file)) {
send_http_error(conn, 304, "Not Modified", "%s", "");
} else {
handle_file_request(conn, path, &file);
}
}
求验证分布在从连接请求开始到请求得到回应的整个过程中。在请求解析之前,比如验证socket的合法性等。在请求解析之后,从buf中解析出HTTP请求消息的各个字段之后,就做一些简单的验证工作,比如说HTTP版本的验证。如果在解析buf时出错,说明请求的格式不对。而且在满足client请求的时候也要进行一些验证,诸如是否有浏览目录的权限、请求的文件是否存在等等,我就不在详述了。