很多时候都需要对http request/response进行简单解析,而这时候如果引入一个库又过于臃肿而繁琐。如果使用粗制滥造的代码进行解析,这样的代码自己都会觉得惭愧。鄙人花费半天时间,设计了一个性能较高,可读性良好,而又使用简单的解析方式。
1. 如果解析出错,报告出错位置以及原因;
2. 性能不可妥协;
3. 代码可读性良好;
4. 容易改造扩展。
这里并不提供http协议的完整实现,只提供http请求/响应头部行的解析。
struct token_t{
enum type_t{
ptn_optional = 0, //可选的单个字符
ptn_exact = 1, //指定的单个字符
ptn_number = 2, //十进制数字
ptn_token = 3,//http field name
ptn_text = 4,//文本数据
};
type_t ptn;
char chr;
uint8_t len;
};
token_t header_line_ptn[] = {
{token_t::ptn_token, '-', 0},
{token_t::ptn_exact, ':', 0},
{token_t::ptn_exact, ' ', 0},
{token_t::ptn_text, 'T', 0},
{token_t::ptn_exact, '\r', 0},
{token_t::ptn_exact, '\n', 0},
};
token_t status_line_ptn[] = {
{token_t::ptn_exact, 'H', 0},
{token_t::ptn_exact, 'T', 0},
{token_t::ptn_exact, 'T', 0},
{token_t::ptn_exact, 'P', 0},
{token_t::ptn_exact, '/', 0},
{token_t::ptn_number, '0', 0}, //major version
{token_t::ptn_exact, '.', 0}, //the dot between major version & minor version
{token_t::ptn_number, '0', 0}, //minor version
{token_t::ptn_exact, ' ', 0}, //the space before status code
{token_t::ptn_number, '0', 0}, //the status code
{token_t::ptn_exact, ' ', 0}, //the space after status code
{token_t::ptn_text, ' ', 0}, //the status text
{token_t::ptn_exact, '\r', 0},
{token_t::ptn_exact, '\n', 0},
};
size_t parse_tokens(token_t *tokens, size_t n_tokens, const char *data, size_t len, int &len_parsed)
{
len_parsed = 0;
for(size_t n_matched = 0; n_matched < n_tokens; ++n_matched)
{
token_t &token = tokens[n_matched];
if(token_t::ptn_optional == token.ptn)
{
if(data[len_parsed] == token.chr)
{
token.len = 1;
++len_parsed;
}
else
{
token.len = 0;
}
}
else if(token_t::ptn_text == token.ptn)
{
token.len = 0;
while(len_parsed < (int)len && data[len_parsed] != '\r' && data[len_parsed] != '\n')
{
++token.len;
++len_parsed;
}
if(0 == token.len) return n_matched;
}
else if(token_t::ptn_exact == token.ptn)
{
if(len_parsed < (int)len && data[len_parsed] == token.chr)
{
token.len = 1;
++len_parsed;
}
else
{
token.len = 0;
return n_matched;
}
}
else if(token_t::ptn_number == token.ptn)
{
token.len = 0;
while(len_parsed < (int)len && isdigit(data[len_parsed]))
{
++token.len;
++len_parsed;
}
if(0 == token.len) return n_matched;
}
else if(token_t::ptn_token == token.ptn)
{
token.len = 0;
while(len_parsed < (int)len && (isalnum(data[len_parsed])
|| data[len_parsed] == '-'
|| data[len_parsed] == '_'
|| data[len_parsed] == '.')
)
{
++token.len;
++len_parsed;
}
if(0 == token.len) return n_matched;
}
}
return n_tokens;;
}
/**
* return value case specification:
* > 0 successfully parsed length
* = 0 insufficient input data to parse the hand_shake_request
* < 0 -ret indicates the position of the first invalid byte encounterred;
*/
inline int parse_switch_protocol(const char *data, size_t len)
{
int line_len = 0;
size_t n_tokens = parse_tokens(status_line_ptn, slp_len, data, len, line_len);
int len_parsed = line_len;
if(n_tokens != slp_len)
{
return -len_parsed;
}
while(true)
{
line_len = 0;
n_tokens = parse_tokens(header_line_ptn, hlp_len, data + len_parsed, len - len_parsed, line_len);
if(n_tokens == hlp_len)
{
const char *kstr = data + len_parsed;
token_t &field_token = header_line_ptn[0];
token_t &value_token = header_line_ptn[hlp_len - 1];
len_parsed += line_len;
//further process here
}
else if(len_parsed + 2 <= len
&& data[len_parsed] == '\r' && data[len_parsed + 1] == '\n'
&& n_tokens == 0 && line_len == 0)
{
len_parsed += 2;
return len_parsed;
}
else
{
len_parsed += line_len;
return -len_parsed;
}
}
}