一. 百分号编码(URL编码)
使用这种编码的目的是为了传输, 类似UTF8的用途.
百分号编码中分为保留字符和非保留字符, 很明显, 所谓的保留字符就是有其特殊用途的, 编码时需要转换的; 非保留字符就是可以直接被使用的, 编码时不需要转换的.
RFC 3986 section 2.3 非保留字符 (2005年1月)
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z
0 1 2 3 4 5 6 7 8 9 - _ . ~
非保留字符是很明确的, 所以在编码时, 只要判断哪些是非保留的, 剩下的就是保留的(需要转换的);
二. 实现
注意: 百分号编码的转换中, 输入数据时UTF8编码的.
//
//百分号编码
// http://zh.wikipedia.org/zh-cn/%E7%99%BE%E5%88%86%E5%8F%B7%E7%BC%96%E7%A0%81
char* UrlEncode(const char* pURL)
{
// 先转换到UTF-8
char* pUTF8 = GB2312ToUTF8(pURL);
int nUTF8Len = strlen(pUTF8);
if(0 == nUTF8Len)
{
return 0;
}
char* pEncode = new char[3 * nUTF8Len + 1];
memset(pEncode, 0, 3 * nUTF8Len + 1);
int i = 0;
int nEncodeIndex = 0;
unsigned char cTemp;
for(i = 0; i < nUTF8Len; ++i)
{
cTemp = pUTF8[i];
if(::isalpha(cTemp) || ::isdigit(cTemp) || '-' == cTemp ||
'.' == cTemp || '~' == cTemp || '_' == cTemp)
{
pEncode[nEncodeIndex] = cTemp;
++nEncodeIndex;
}
else if(' ' == cTemp)
{
pEncode[nEncodeIndex] = '+';
++nEncodeIndex;
}
else
{
pEncode[nEncodeIndex] = '%';
pEncode[nEncodeIndex + 1] = (0xA0 <= cTemp) ? ((cTemp >> 4) - 0x0A + 'A') : ((cTemp >> 4) + '0');
pEncode[nEncodeIndex + 2] = (0x0A <= (0x0F & cTemp)) ? ((0x0F & cTemp) - 0x0A + 'A') : ((0x0F & cTemp) + '0');
nEncodeIndex += 3;
}
}
delete [] pUTF8;
return pEncode;
}
//解码后是utf-8编码
char* UrlDecode(const char* pURL)
{
int nURLLen = strlen(pURL);
if(0 == nURLLen)
{
return 0;
}
char* pUTF8 = new char[nURLLen + 1];
memset(pUTF8, 0, nURLLen + 1);
int i = 0;
int nDecodeIndex = 0;
unsigned char cTemp = 0;
for(i = 0; i < nURLLen; ++i)
{
cTemp = pURL[i];
if('%' == cTemp)
{
if(i + 2 >= nURLLen)
{
break;
}
cTemp = pURL[i + 1];
if('A' <= cTemp && 'F' >= cTemp)
{
pUTF8[nDecodeIndex] = (cTemp - 'A' + 0x0A) * 0x10;
}
else if('a' <= cTemp && 'f' >= cTemp)
{
pUTF8[nDecodeIndex] = (cTemp - 'a' + 0x0A) * 0x10;
}
else
{
pUTF8[nDecodeIndex] = (cTemp - '0') * 0x10;
}
cTemp = pURL[i + 2];
if('A' <= cTemp && 'F' >= cTemp)
{
pUTF8[nDecodeIndex] += (cTemp - 'A' + 0x0A);
}
else if('a' <= cTemp && 'f' >= cTemp)
{
pUTF8[nDecodeIndex] += (cTemp - 'a' + 0x0A);
}
else
{
pUTF8[nDecodeIndex] += (cTemp - '0');
}
i += 2;
}
else if('+' == cTemp)
{
pUTF8[nDecodeIndex] = ' ';
}
else
{
pUTF8[nDecodeIndex] = cTemp;
}
++nDecodeIndex;
}
char* pDecode = UTF8ToGB2312(pUTF8);
delete [] pUTF8;
return pDecode;
}
int _tmain(int argc, _TCHAR* argv[])
{
std::string strURL = "http://zh.wikipedia.org/zh-cn/百分号编码";
std::string strURLE = "http://zh.wikipedia.org/zh-cn/%E7%99%BE%E5%88%86%E5%8F%B7%E7%BC%96%E7%A0%81";
char ch[512];
char* p = UrlEncode(strURL.c_str());
char* p1 = UrlDecode(strURLE.c_str());
delete [] p;
delete [] p1;
return 0;
}
参考: http://zh.wikipedia.org/zh-cn/%E7%99%BE%E5%88%86%E5%8F%B7%E7%BC%96%E7%A0%81