在程序中经常用到GBK与UTF-8的转化,尤其是对于一些中文字符的处理,现总结如下:
windows:
std::string UTF8ToGBK(const std::string& strUtf8)//UTF-8编码转化为多字节
{
std::string strGBK;
int len=MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)strUtf8.c_str(), -1, NULL,0);
unsigned short * wszGBK = new unsigned short[len+1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)strUtf8.c_str(), -1, (LPWSTR)wszGBK, len);
len = WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK=new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte (CP_ACP, 0, (LPCWSTR)wszGBK, -1, szGBK, len, NULL,NULL);
strGBK=szGBK;
delete[] szGBK;
delete[] wszGBK;
return strGBK;
}
std::string GBKToUTF8(const std::string& strGBK)
{
std::string strOutUTF8 = "";
WCHAR * str1;
int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
str1 = new WCHAR[n];
MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);
n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
char * str2 = new char[n];
WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
strOutUTF8 = str2;
delete[]str1;
str1 = NULL;
delete[]str2;
str2 = NULL;
return strOutUTF8;
}
linux:
int GbkToUtf8(char *sOut, int iMaxOutLen, const char *sIn, int iInLen)
{
char *pIn = (char *)sIn;
char *pOut = sOut;
size_t ret;
size_t iLeftLen=iMaxOutLen;
iconv_t cd;
cd = iconv_open("utf-8","gbk");
if (cd == (iconv_t) - 1)
{
return -1;
}
size_t iSrcLen=iInLen;
ret = iconv(cd, &pIn,&iSrcLen, &pOut,&iLeftLen);
if (ret == (size_t) - 1)
{
iconv_close(cd);
return -1;
}
iconv_close(cd);
return (iMaxOutLen - iLeftLen);
}
int Utf8ToGbk(char *sOut, int iMaxOutLen, const char *sIn, int iInLen)
{
char *pIn = (char *)sIn;
char *pOut = sOut;
size_t ret;
size_t iLeftLen=iMaxOutLen;
iconv_t cd;
cd = iconv_open("gbk","utf-8");
if (cd == (iconv_t) - 1)
{
return -1;
}
size_t iSrcLen=iInLen;
ret = iconv(cd, &pIn,&iSrcLen, &pOut,&iLeftLen);
if (ret == (size_t) - 1)
{
iconv_close(cd);
return -1;
}
iconv_close(cd);
return (iMaxOutLen - iLeftLen);
}