在因特网上传送URL,只能采用ASCII字符集.即URL只能使用英文字母、阿拉伯数字和某些标点符号,不能使用其他文字和符号,即只有字母和数字[0-9a-zA-Z]
、一些特殊符号$-_.+!*'()
[不包括双引号]、以及某些保留字(空格转换为+
),才可以不经过编码直接用于URL。
在使用libcurl接收的数据中如果包含url,则会被自动编码,如图一所示,实际的数据+ =会被编码成%2B 和 %3D。所以收到的rul要解码才可以直接使用。
图一
static unsigned char dec_tab[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/**
* URL解码函数
* @param str {const char*} 经URL编码后的字符串
* @param iStrLen {int} str大小
* @param sBuf {char*} 解码后的字符串
* @param iBufLen {int} sBuf大小
*/
bool acl_url_decode(const char *str, int iStrLen, char *sBuf, int iBufLen){
int len = (int) strlen(str);
if (iBufLen < iStrLen + 1)
return false;
int i = 0, pos = 0;
for (i = 0; i < len; i++) {
if (str[i] != '%')
sBuf[pos] = str[i];
else if (i + 2 >= len) { /* check boundary */
sBuf[pos++] = '%'; /* keep it */
if (++i >= len)
break;
sBuf[pos] = str[i];
break;
} else if (isalnum(str[i + 1]) && isalnum(str[i + 2])) {
sBuf[pos] = (dec_tab[(unsigned char) str[i + 1]] << 4)
+ dec_tab[(unsigned char) str[i + 2]];
i += 2;
} else
sBuf[pos] = str[i];
pos++;
}
sBuf[pos] = '\0';
return true;
}
static unsigned char char_to_hex( unsigned char x )
{
return (unsigned char)(x > 9 ? x + 55: x + 48);
}
static int is_alpha_number_char( unsigned char c )
{
if ( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') )
return 1;
return 0;
}
//url编码实现
void urlencode( unsigned char * src, int src_len, unsigned char * dest, int dest_len )
{
unsigned char ch;
int len = 0;
while (len < (dest_len - 4) && *src)
{
ch = (unsigned char)*src;
if (*src == ' ')
{
*dest++ = '+';
}
else if (is_alpha_number_char(ch) || strchr("=!~*'()", ch))
{
*dest++ = *src;
}
else
{
*dest++ = '%';
*dest++ = char_to_hex( (unsigned char)(ch >> 4) );
*dest++ = char_to_hex( (unsigned char)(ch % 16) );
}
++src;
++len;
}
*dest = 0;
return ;
}