URL中文 encode decode
实测有用,可放心使用
参考该博客,并做少许修改,验证可用。
https://www.cnblogs.com/hoodlum1980/archive/2012/05/28/2521500.html
源码
#include "windows.h"
#include "stringapiset.h"
BOOL UrlEncode(const char* pSrcBuffer, char* pDstBuffer, int nDstBufLen, BOOL bUpperCase)
{
if (pSrcBuffer == NULL || pDstBuffer == NULL || nDstBufLen <= 0)
return FALSE;
size_t len_ascii = strlen(pSrcBuffer);
if (len_ascii == 0)
{
pDstBuffer[0] = 0;
return TRUE;
}
// 1.先转换到UTF-8
char baseChar = bUpperCase ? 'A' : 'a';
int cchWideChar = MultiByteToWideChar(CP_ACP, 0, pSrcBuffer, len_ascii, NULL, 0);
LPWSTR pUnicode = (LPWSTR)malloc((cchWideChar + 1) * sizeof(WCHAR));
if (pUnicode == NULL)
return FALSE;
MultiByteToWideChar(CP_ACP, 0, pSrcBuffer, len_ascii, pUnicode, cchWideChar + 1);
int cbUTF8 = WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, NULL, 0, NULL, NULL);
LPSTR pUTF8 = (LPSTR)malloc((cbUTF8 + 1) * sizeof(CHAR));
if (pUTF8 == NULL)
{
free(pUnicode);
return FALSE;
}
WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, pUTF8, cbUTF8 + 1, NULL, NULL);
pUTF8[cbUTF8] = '\0';
// 2.再从UTF-8逐字符转化为目标URL
unsigned char c;
int cbDst = 0; //累加
unsigned char *pSrc = (unsigned char*)pUTF8;
unsigned char *pDst = (unsigned char*)pDstBuffer;
while (*pSrc && cbDst < nDstBufLen - 1)
{
c = *pSrc;
// 2.1 不转化的场景:字母、数字、特殊字符
if (isalpha(c) || isdigit(c)
|| c == ':' || c == '/' || c == '.' || c == '-' || c == '_' || c == '~')
{
*pDst = c;
++pDst;
++cbDst;
}
// 2.2 特殊转化:空格变为"+"
else if (c == ' ')
{
*pDst = '+';
++pDst;
++cbDst;
}
// 2.3 逐字符转化为3部分
else
{
//检查缓冲区大小是否够用?
if (cbDst + 3 > nDstBufLen - 1)
break;
pDst[0] = '%';
pDst[1] = (c >= 0xA0) ? ((c >> 4) - 10 + baseChar) : ((c >> 4) + '0');
pDst[2] = ((c & 0xF) >= 0xA) ? ((c & 0xF) - 10 + baseChar) : ((c & 0xF) + '0');
pDst += 3;
cbDst += 3;
}
++pSrc;
}
// null-terminator
*pDst = '\0';
// 释放内存
free(pUnicode);
free(pUTF8);
return TRUE;
}
//解码后是utf-8编码
BOOL UrlDecode(const char* szSrc, char* pBuf, int cbBufLen)
{
if (szSrc == NULL || pBuf == NULL || cbBufLen <= 0)
return FALSE;
size_t len_ascii = strlen(szSrc);
if (len_ascii == 0)
{
pBuf[0] = 0;
return TRUE;
}
char *pUTF8 = (char*)malloc(len_ascii + 1);
if (pUTF8 == NULL)
return FALSE;
int cbDest = 0; //累加
unsigned char *pSrc = (unsigned char*)szSrc;
unsigned char *pDest = (unsigned char*)pUTF8;
while (*pSrc)
{
if (*pSrc == '%')
{
*pDest = 0;
//高位
if (pSrc[1] >= 'A' && pSrc[1] <= 'F')
*pDest += (pSrc[1] - 'A' + 10) * 0x10;
else if (pSrc[1] >= 'a' && pSrc[1] <= 'f')
*pDest += (pSrc[1] - 'a' + 10) * 0x10;
else
*pDest += (pSrc[1] - '0') * 0x10;
//低位
if (pSrc[2] >= 'A' && pSrc[2] <= 'F')
*pDest += (pSrc[2] - 'A' + 10);
else if (pSrc[2] >= 'a' && pSrc[2] <= 'f')
*pDest += (pSrc[2] - 'a' + 10);
else
*pDest += (pSrc[2] - '0');
pSrc += 3;
}
else if (*pSrc == '+')
{
*pDest = ' ';
++pSrc;
}
else
{
*pDest = *pSrc;
++pSrc;
}
++pDest;
++cbDest;
}
//null-terminator
*pDest = '\0';
++cbDest;
int cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pUTF8, cbDest, NULL, 0);
LPWSTR pUnicode = (LPWSTR)malloc(cchWideChar * sizeof(WCHAR));
if (pUnicode == NULL)
{
free(pUTF8);
return FALSE;
}
MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pUTF8, cbDest, pUnicode, cchWideChar);
WideCharToMultiByte(CP_ACP, 0, pUnicode, cchWideChar, pBuf, cbBufLen, NULL, NULL);
free(pUTF8);
free(pUnicode);
return TRUE;
}