百分号编码详解-CSDN博客

本文链接：https://blog.csdn.net/bagboy_taobao_com/article/details/42454831

一. 百分号编码(URL编码)

使用这种编码的目的是为了传输, 类似UTF8的用途.
百分号编码中分为保留字符和非保留字符, 很明显, 所谓的保留字符就是有其特殊用途的, 编码时需要转换的; 非保留字符就是可以直接被使用的, 编码时不需要转换的.

RFC 3986 section 2.3 非保留字符 (2005年1月)
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z
0 1 2 3 4 5 6 7 8 9 - _ . ~
非保留字符是很明确的, 所以在编码时, 只要判断哪些是非保留的, 剩下的就是保留的(需要转换的);

二. 实现

注意: 百分号编码的转换中, 输入数据时UTF8编码的.
//

//百分号编码
// http://zh.wikipedia.org/zh-cn/%E7%99%BE%E5%88%86%E5%8F%B7%E7%BC%96%E7%A0%81
char* UrlEncode(const char* pURL)
{
    // 先转换到UTF-8
	char* pUTF8 = GB2312ToUTF8(pURL);
	int nUTF8Len = strlen(pUTF8);
	if(0 == nUTF8Len)
	{
		return 0;
	}
	char* pEncode = new char[3 * nUTF8Len + 1];
	memset(pEncode, 0, 3 * nUTF8Len + 1);

	int i = 0;
	int nEncodeIndex = 0;
	unsigned char cTemp;
	for(i = 0; i < nUTF8Len; ++i)
	{
		cTemp = pUTF8[i];
		if(::isalpha(cTemp) || ::isdigit(cTemp) || '-' == cTemp || 
			'.' == cTemp || '~' == cTemp || '_' == cTemp)
        {
            pEncode[nEncodeIndex] = cTemp;
			++nEncodeIndex;
        }
        else if(' ' == cTemp)
        {
            pEncode[nEncodeIndex] = '+';
			++nEncodeIndex;
        }
        else
        {
            pEncode[nEncodeIndex] = '%';
            pEncode[nEncodeIndex + 1] = (0xA0 <= cTemp) ? ((cTemp >> 4) - 0x0A + 'A') : ((cTemp >> 4) + '0');
            pEncode[nEncodeIndex + 2] = (0x0A <= (0x0F & cTemp)) ? ((0x0F & cTemp)  - 0x0A + 'A') : ((0x0F & cTemp) + '0');
            nEncodeIndex += 3;
        }
	}
	delete [] pUTF8;
	return pEncode;
}

//解码后是utf-8编码
char* UrlDecode(const char* pURL)
{
	int nURLLen = strlen(pURL);
	if(0 == nURLLen)
	{
		return 0;
	}

	char* pUTF8 = new char[nURLLen + 1];
	memset(pUTF8, 0, nURLLen + 1);

	int i = 0;
	int nDecodeIndex = 0;
	unsigned char cTemp = 0;
	for(i = 0; i < nURLLen; ++i)
	{
		cTemp = pURL[i];
		if('%' == cTemp)
		{
			if(i + 2 >= nURLLen)
			{
				break;
			}
			cTemp = pURL[i + 1];
            if('A' <= cTemp && 'F' >= cTemp)
			{
                pUTF8[nDecodeIndex] = (cTemp - 'A' + 0x0A) * 0x10;
			}
            else if('a' <= cTemp && 'f' >= cTemp)
			{
                pUTF8[nDecodeIndex] = (cTemp - 'a' + 0x0A) * 0x10;
			}
            else
			{
                pUTF8[nDecodeIndex] = (cTemp - '0') * 0x10;
			}

			cTemp = pURL[i + 2];
            if('A' <= cTemp && 'F' >= cTemp)
			{
                pUTF8[nDecodeIndex] += (cTemp - 'A' + 0x0A);
			}
            else if('a' <= cTemp && 'f' >= cTemp)
			{
                pUTF8[nDecodeIndex] += (cTemp - 'a' + 0x0A);
			}
            else
			{
                pUTF8[nDecodeIndex] += (cTemp - '0');
			}
			i += 2;
		}
		else if('+' == cTemp)
        {
			pUTF8[nDecodeIndex] = ' ';
		}
        else
        {
			pUTF8[nDecodeIndex] = cTemp;
		}
		++nDecodeIndex;
	}
	char* pDecode = UTF8ToGB2312(pUTF8);
	delete [] pUTF8;
	return pDecode;
}

int _tmain(int argc, _TCHAR* argv[])
{
	std::string strURL = "http://zh.wikipedia.org/zh-cn/百分号编码";
	std::string strURLE = "http://zh.wikipedia.org/zh-cn/%E7%99%BE%E5%88%86%E5%8F%B7%E7%BC%96%E7%A0%81";
	char ch[512];

	char* p = UrlEncode(strURL.c_str());
	char* p1 = UrlDecode(strURLE.c_str());

	delete [] p;
	delete [] p1;
	return 0;
}

参考: http://zh.wikipedia.org/zh-cn/%E7%99%BE%E5%88%86%E5%8F%B7%E7%BC%96%E7%A0%81