首先看下HTTP请求报文结构
method空格URL空格version回车换行符
头部字段名:值回车换行符
...
头部字段名:值回车换行符
回车换行符
请求数据
其中回车是'\r',换行符是'\n'。
具体地可以看一个例子。
GET /search?hl=zh-CN&source=hp&q=domety&aq=f&oq= HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint,
application/msword, application/x-silverlight, application/x-shockwave-flash, */*
Referer: http://www.google.cn/
Accept-Language: zh-cn
Accept-Encoding: gzip, deflate
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; TheWorld)
Host: www.google.cn
Connection: Keep-Alive
Cookie: PREF=ID=80a06da87be9ae3c:U=f7167333e2c3b714:NW=1:TM=1261551909:LM=1261551917:S=ybYcq2wpfefs4V9g;
NID=31=ojj8d-IygaEtSxLgaJmqSjVhCspkviJrB6omjamNrSm8lZhKy_yMfO2M4QMRKcH1g0iQv9u-2hfBW7bUFwVh7pGaRUb0RnHcJU37y-
FxlRugatx63JLv7CWMD6UB_O_r
- request结构体
typedef struct tk_http_request{ char* root;// 配置目录 int fd; int epoll_fd; char buff[MAX_BUF]; size_t pos; size_t last; int state; //请求行 void* request_start; void* method_end; int method; void* uri_start; void* uri_end; void* path_start; void* path_end; void* query_start; void* query_end; int http_major; int http_minor; void* request_end; struct list_head list; // 存储请求头,list.h中定义了此结构 void* cur_header_key_start; void* cur_header_key_end; void* cur_header_value_start; void* cur_header_value_end; void* timer; //记录请求的时间 }tk_http_request_t;
1.记录请求开始和结束的位置
2.记录请求方法的位置
3.记录uri开始和结束位置
4.记录版本号,小数点前和小数点后 - 解析request请求行
tk_http_parse_request_line
#define CR '\r' #define LF '\n' int tk_http_parse_request_line(tk_http_request_t *request){ enum{ sw_start = 0, sw_method, sw_spaces_before_uri, sw_after_slash_in_uri, sw_http, sw_http_H, sw_http_HT, sw_http_HTT, sw_http_HTTP, sw_first_major_digit, sw_major_digit, sw_first_minor_digit, sw_minor_digit, sw_spaces_after_digit, sw_almost_done }state; state = request->state; u_char ch, *p, *m; size_t pi; for(pi = request->pos; pi < request->last; pi++){ p = (u_char *)&request->buff[pi % MAX_BUF]; ch = *p; switch(state){ case sw_start: request->request_start = p; if(ch == CR || ch == LF) break; if((ch < 'A' || ch > 'Z') && ch != '_') return TK_HTTP_PARSE_INVALID_METHOD; state = sw_method; break; case sw_method: if(ch == ' '){ request->method_end = p; m = request->request_start; switch(p - m){ case 3: if(tk_str3_cmp(m, 'G', 'E', 'T', ' ')){ request->method = TK_HTTP_GET; break; } break; case 4: if(tk_str3Ocmp(m, 'P', 'O', 'S', 'T')){ request->method = TK_HTTP_POST; break; } if(tk_str4cmp(m, 'H', 'E', 'A', 'D')){ request->method = TK_HTTP_HEAD; break; } break; default: request->method = TK_HTTP_UNKNOWN; break; } state = sw_spaces_before_uri; break; } if((ch < 'A' || ch > 'Z') && ch != '_') return TK_HTTP_PARSE_INVALID_METHOD; break; case sw_spaces_before_uri: if(ch == '/'){ request->uri_start = p + 1; state = sw_after_slash_in_uri; break; } switch(ch){ case ' ': break; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_after_slash_in_uri: switch(ch){ case ' ': request->uri_end = p; state = sw_http; break; default: break; } break; case sw_http: switch(ch){ case ' ': break; case 'H': state = sw_http_H; break; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_http_H: switch(ch){ case 'T': state = sw_http_HT; break; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_http_HT: switch(ch){ case 'T': state = sw_http_HTT; break; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_http_HTT: switch(ch){ case 'P': state = sw_http_HTTP; break; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_http_HTTP: switch(ch){ case '/': state = sw_first_major_digit; break; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_first_major_digit: if(ch < '1' || ch > '9') return TK_HTTP_PARSE_INVALID_REQUEST; request->http_major = ch - '0'; state = sw_major_digit; break; case sw_major_digit: if(ch == '.'){ state = sw_first_minor_digit; break; } if(ch < '0' || ch > '9') return TK_HTTP_PARSE_INVALID_REQUEST; request->http_major = request->http_major * 10 + ch - '0'; break; case sw_first_minor_digit: if(ch < '0' || ch > '9') return TK_HTTP_PARSE_INVALID_REQUEST; request->http_minor = ch - '0'; state = sw_minor_digit; break; case sw_minor_digit: if(ch == CR){ state = sw_almost_done; break; } if(ch == LF) goto done; if(ch == ' '){ state = sw_spaces_after_digit; break; } if(ch < '0' || ch > '9') return TK_HTTP_PARSE_INVALID_REQUEST; request->http_minor = request->http_minor * 10 + ch - '0'; break; case sw_spaces_after_digit: switch(ch){ case ' ': break; case CR: state = sw_almost_done; break; case LF: goto done; default: return TK_HTTP_PARSE_INVALID_REQUEST; } break; case sw_almost_done: request->request_end = p - 1; switch(ch){ case LF: goto done; default: return TK_HTTP_PARSE_INVALID_REQUEST; } } } request->pos = pi; request->state = state; return TK_AGAIN; done: request->pos = pi + 1; if (request->request_end == NULL) request->request_end = p; request->state = sw_start; return 0; }
主要是获取
1.记录请求开始和结束的位置
2.记录请求方法的位置
3.记录uri开始和结束位置
4.记录版本号,小数点前和小数点后
这些信息