string和wstring互相转换
1、string转wstring
std::wstring StringToWstring(const std::string &strInput,unsigned int uCodePage)
{
#ifndef _WIN32
if (strInput.empty())
{
return L"";
}
std::string strLocale = setlocale(LC_ALL, "");
const char* pSrc = strInput.c_str();
unsigned int iDestSize = mbstowcs(NULL, pSrc, 0) + 1;
wchar_t* szDest = new wchar_t[iDestSize];
wmemset(szDest, 0, iDestSize);
mbstowcs(szDest,pSrc,iDestSize);
std::wstring wstrResult = szDest;
delete []szDest;
setlocale(LC_ALL, strLocale.c_str());
return wstrResult;
#else
std::wstring strUnicode = L"";
if (strInput.length() == 0)
{
return strUnicode;
}
int iLength = ::MultiByteToWideChar(uCodePage, 0, strInput.c_str(), -1, NULL, 0);
wchar_t* szDest = new wchar_t[iLength + 1];
memset(szDest, 0, (iLength + 1) * sizeof(wchar_t));
::MultiByteToWideChar(uCodePage, 0, strInput.c_str(), -1, (wchar_t*) szDest, iLength);
strUnicode = szDest;
delete[] szDest;
return strUnicode;
#endif
}
2、wstring转string
std::string WstringToString(const std::wstring &wstrInput,unsigned int uCodePage)
{
#ifndef _WIN32
if (wstrInput.empty())
{
return "";
}
std::string strLocale = setlocale(LC_ALL, "");
const wchar_t* pSrc = wstrInput.c_str();
unsigned int iDestSize = wcstombs(NULL, pSrc, 0) + 1;
char *szDest = new char[iDestSize];
memset(szDest,0,iDestSize);
wcstombs(szDest,pSrc,iDestSize);
std::string strResult = szDest;
delete []szDest;
setlocale(LC_ALL, strLocale.c_str());
return strResult;
#else
std::string strAnsi = "";
if (wstrInput.length() == 0)
{
return strAnsi;
}
int iLength = ::WideCharToMultiByte(uCodePage, 0, wstrInput.c_str(), -1, NULL, 0,NULL, NULL);
char* szDest = new char[iLength + 1];
memset((void*) szDest, 0, (iLength + 1) * sizeof(char));
::WideCharToMultiByte(uCodePage, 0, wstrInput.c_str(), -1, szDest, iLength, NULL,NULL);
strAnsi = szDest;
delete[] szDest;
return strAnsi;
#endif
}
3、函数实现说明
Linux下使用的核心函数是:mbstowcs和wcstombs,Windows下使用的是MultiByteToWideChar和WideCharToMultiByte。函数实现需注意一下两点:
1、转换之前都需要先用API获取需要转换的大小,然后再分配对应内存,再转换出目标字符串。
2、编码选择,Linux下是使用的setlocale函数获取本地编码,并且将转换出来的字符串设置为对应编码。Windows下是支持用户传入编码进去,传入编码一定要正确,不然会出现乱码。
4、附上判断字符串编码方法示例
ANSI 无格式定义 对于中文编码格式是GB2312;
Unicode 文本里前两个字节为FF FE 字节流是little endian
Unicode big endian 文本里前两个字节为FE FF 字节流是big endian
UTF-8 前两字节为EF BB,第三字节为BF 带bom
std::string strTest = "Test数据";
unsigned char cha = strTest[0];
int iCode = cha << 8;
cha = strTest[1];
iCode += cha;
std::string strCode;
switch(iCode) //判断文本前两个字节
{
case 0xfffe: //65534
strCode = "Unicode";
break;
case 0xfeff: //65279
strCode = "Unicode big endian";
break;
case 0xefbb: //61371
strCode = "UTF-8";
break;
default:
strCode = "ANSI";
}