http request/response请求/响应高性能解析

最新推荐文章于 2022-10-11 17:38:49 发布

Heron & Sense

最新推荐文章于 2022-10-11 17:38:49 发布

阅读量571

点赞数

文章标签： http 解析 request解析 response解析

本文链接：https://blog.csdn.net/herongoal/article/details/82257927

版权

很多时候都需要对http request/response进行简单解析，而这时候如果引入一个库又过于臃肿而繁琐。如果使用粗制滥造的代码进行解析，这样的代码自己都会觉得惭愧。鄙人花费半天时间，设计了一个性能较高，可读性良好，而又使用简单的解析方式。

1. 如果解析出错，报告出错位置以及原因；

2. 性能不可妥协；

3. 代码可读性良好；

4. 容易改造扩展。

这里并不提供http协议的完整实现，只提供http请求/响应头部行的解析。

struct token_t{
enum type_t{
ptn_optional = 0, //可选的单个字符
ptn_exact = 1, //指定的单个字符
ptn_number = 2, //十进制数字
ptn_token = 3,//http field name
ptn_text = 4,//文本数据
};
type_t ptn;
char chr;
uint8_t len;
};

token_t header_line_ptn[] = {
{token_t::ptn_token, '-', 0},
{token_t::ptn_exact, ':', 0},
{token_t::ptn_exact, ' ', 0},
{token_t::ptn_text, 'T', 0},
{token_t::ptn_exact, '\r', 0},
{token_t::ptn_exact, '\n', 0},
};

token_t status_line_ptn[] = {
{token_t::ptn_exact, 'H', 0},
{token_t::ptn_exact, 'T', 0},
{token_t::ptn_exact, 'T', 0},
{token_t::ptn_exact, 'P', 0},
{token_t::ptn_exact, '/', 0},
{token_t::ptn_number, '0', 0}, //major version
{token_t::ptn_exact, '.', 0}, //the dot between major version & minor version
{token_t::ptn_number, '0', 0}, //minor version
{token_t::ptn_exact, ' ', 0}, //the space before status code
{token_t::ptn_number, '0', 0}, //the status code
{token_t::ptn_exact, ' ', 0}, //the space after status code
{token_t::ptn_text, ' ', 0}, //the status text
{token_t::ptn_exact, '\r', 0},
{token_t::ptn_exact, '\n', 0},
};

size_t parse_tokens(token_t *tokens, size_t n_tokens, const char *data, size_t len, int &len_parsed)
{
len_parsed = 0;
for(size_t n_matched = 0; n_matched < n_tokens; ++n_matched)
{
token_t &token = tokens[n_matched];

if(token_t::ptn_optional == token.ptn)
{
if(data[len_parsed] == token.chr)
{
token.len = 1;
++len_parsed;
}
else
{
token.len = 0;
}
}
else if(token_t::ptn_text == token.ptn)
{
token.len = 0;
while(len_parsed < (int)len && data[len_parsed] != '\r' && data[len_parsed] != '\n')
{
++token.len;
++len_parsed;
}
if(0 == token.len) return n_matched;
}
else if(token_t::ptn_exact == token.ptn)
{
if(len_parsed < (int)len && data[len_parsed] == token.chr)
{
token.len = 1;
++len_parsed;
}
else
{
token.len = 0;
return n_matched;
}
}
else if(token_t::ptn_number == token.ptn)
{
token.len = 0;
while(len_parsed < (int)len && isdigit(data[len_parsed]))
{
++token.len;
++len_parsed;
}
if(0 == token.len) return n_matched;
}
else if(token_t::ptn_token == token.ptn)
{
token.len = 0;
while(len_parsed < (int)len && (isalnum(data[len_parsed])
|| data[len_parsed] == '-'
|| data[len_parsed] == '_'
|| data[len_parsed] == '.')
)
{
++token.len;
++len_parsed;
}
if(0 == token.len) return n_matched;
}
}
return n_tokens;;
}

/**
* return value case specification:
* > 0 successfully parsed length
* = 0 insufficient input data to parse the hand_shake_request
* < 0 -ret indicates the position of the first invalid byte encounterred;
*/
inline int parse_switch_protocol(const char *data, size_t len)
{
int line_len = 0;
size_t n_tokens = parse_tokens(status_line_ptn, slp_len, data, len, line_len);

int len_parsed = line_len;

if(n_tokens != slp_len)
{
return -len_parsed;
}

while(true)
{
line_len = 0;
n_tokens = parse_tokens(header_line_ptn, hlp_len, data + len_parsed, len - len_parsed, line_len);

if(n_tokens == hlp_len)
{
const char *kstr = data + len_parsed;
token_t &field_token = header_line_ptn[0];
token_t &value_token = header_line_ptn[hlp_len - 1];

len_parsed += line_len;

//further process here
}
else if(len_parsed + 2 <= len
&& data[len_parsed] == '\r' && data[len_parsed + 1] == '\n'
&& n_tokens == 0 && line_len == 0)
{
len_parsed += 2;
return len_parsed;
}
else
{
len_parsed += line_len;
return -len_parsed;
}
}
}

Heron & Sense

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
http request/response请求/响应高性能解析

很多时候都需要对http request/response进行简单解析，而这时候如果引入一个库又过于臃肿而繁琐。如果使用粗制滥造的代码进行解析，这样的代码自己都会觉得惭愧。鄙人花费半天时间，设计了一个性能较高，可读性良好，而又使用简单的解析方式。1. 如果解析出错，报告出错位置以及原因；2. 性能不可妥协；3. 代码可读性良好；4. 容易改造扩展。这里并不提供h...
复制链接

扫一扫