C++常用字符转换UTF8到string类型:
直接C++提供的接口,在window下使用window api,在linux下使用C++接口;此方法也可以直接跨平台使用,在linux、window下都适用
例如:在代码中这接写中文转UTF8码,可以使用如下接口
string str=“中国”
string utf8Str = ansi_to_utf8(str);
#include <string>
std::string utf8_to_ansi(const char* in);
std::string ansi_to_utf8(const char* in);
std::string uni_to_ansi(const wchar_t* in);
std::wstring ansi_to_uni(const char* in);
std::string uni_to_utf8(const wchar_t* in);
std::wstring utf8_to_uni(const char* in);
#include <stdio.h>
#ifdef _WIN32
#include <Windows.h>
#else
#include "uniconv.h"
#endif
#ifdef _WIN32
std::string uni_to_(unsigned int acp, const wchar_t* lpw)
{
int nChars = (wcslen(lpw) + 1) * 3;
char* lpa = new char[nChars];
lpa[0] = 0;
int ret = WideCharToMultiByte(acp, 0, lpw, -1, lpa, nChars, NULL, NULL);
if (ret == 0) // error?
return "";
std::string strlpa = lpa;
delete[]lpa;
return strlpa;
}
std::wstring _to_uni(unsigned int acp, const char* lpa)
{
int nChars = strlen(lpa) + 1;
wchar_t* lpw = new wchar_t[nChars];
lpw[0] = 0;
int ret = MultiByteToWideChar(acp, 0, lpa, -1, lpw, nChars);
if (ret == 0)
return L"";
std::wstring wstrlpw = lpw;
delete[]lpw;
return wstrlpw;
}
#elif defined(__linux__)
int codeset_conv(const char* from_codeset, const char* to_codeset, char* in, size_t inlen, char* out, size_t outlen)
{
iconv_t cd;
int ret = 0;
cd = iconv_open(to_codeset, from_codeset);
if (cd == (iconv_t)-1)
return errno;
if (iconv(cd,
#ifdef NEOWARE_NL3
(const char**)&in,
#else
& in,
#endif
& inlen, &out, &outlen) == -1)
{
ret = errno;
fprintf(stderr, "%d %s\n", errno, strerror(errno));
}
iconv_close(cd);
return ret;
}
std::string codeset_conv(const char* from_codeset, const char* to_codeset, const char* in)
{
size_t inlen = strlen(in) + 1;
size_t outlen = inlen * 2;
char* in1, * in2;
char* out1, * out2;
in1 = in2 = new char[inlen];
out1 = out2 = new char[outlen];
strcpy(in1, in);
memset(out1, 0, outlen);
if (0 != codeset_conv(from_codeset, to_codeset, in1, inlen, out1, outlen))
return in; // If conversion fails, return the original string.
std::string strout = out2;
delete[]in2;
delete[]out2;
return strout;
}
#endif // __linux__
//
std::string utf8_to_ansi(const char* in)
{
#ifdef _WIN32
return uni_to_ansi(utf8_to_uni(in).c_str());
#elif defined(__linux__)
char* codeset = nl_langinfo(CODESET);
return codeset_conv("UTF-8", codeset, in);
#endif
}
std::string ansi_to_utf8(const char* in)
{
#ifdef _WIN32
return uni_to_utf8(ansi_to_uni(in).c_str());
#elif defined(__linux__)
char* codeset = nl_langinfo(CODESET);
return codeset_conv(codeset, "UTF-8", in);
#endif
}
std::string uni_to_ansi(const wchar_t* in)
{
#ifdef _WIN32
return uni_to_(CP_ACP, in);
#elif defined(__linux__)
// TODO: to be implemented
#endif
return "";
}
std::wstring ansi_to_uni(const char* in)
{
#ifdef _WIN32
return _to_uni(CP_ACP, in);
#elif defined(__linux__)
// TODO: to be implemented
#endif
return L"";
}
std::string uni_to_utf8(const wchar_t* in)
{
#ifdef _WIN32
return uni_to_(CP_UTF8, in);
#elif defined(__linux__)
// TODO: to be implemented
#endif
return "";
}
std::wstring utf8_to_uni(const char* in)
{
#ifdef _WIN32
return _to_uni(CP_UTF8, in);
#elif defined(__linux__)
// TODO: to be implemented
#endif
return L"";
}
另外一种:
#ifdef _MSC_VER
std::string UTF8ToGBK(const std::string& strUtf8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);
WCHAR* wszGBK = new WCHAR[len + 1];
memset(wszGBK, 0, len * sizeof(WCHAR) + 2);
MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wszGBK, len);
len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK = new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte(CP_ACP,0, wszGBK, -1, szGBK, len, NULL, NULL);
std::string strTemp(szGBK);
delete[]szGBK;
delete[]wszGBK;
return strTemp;
}
std::string GBKToUTF8(const std::string& strGBK)
{
string strOutUTF8 = "";
WCHAR * str1;
int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
str1 = new WCHAR[n];
MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);
n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
char * str2 = new char[n];
WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
strOutUTF8 = str2;
delete[]str1;
str1 = NULL;
delete[]str2;
str2 = NULL;
return strOutUTF8;
}
#else
#include <iconv.h>
int charset_convert(const char *from_charset, const char *to_charset, char *in_buf, size_t in_left, char *out_buf, size_t out_left)
{
iconv_t icd;
char *pin = in_buf;
char *pout = out_buf;
size_t out_len = out_left;
if ((iconv_t)-1 == (icd = iconv_open(to_charset,from_charset)))
{
return -1;
}
if ((size_t)-1 == iconv(icd, &pin, &in_left, &pout, &out_left)) \
{
iconv_close(icd);
return -1;
}
out_buf[out_len - out_left] = 0;
iconv_close(icd);
return (int)out_len - out_left;
}
std::string UTF8ToGBK(const std::string& strUtf8)
{
int iGbkLength = (strUtf8.size() + 1) * 4;
char* pStrGBK = new char[iGbkLength];
if (charset_convert("UTF-8", "GB2312", const_cast<char*>(strUtf8.c_str()), strUtf8.size(), pStrGBK, iGbkLength) != -1)
{
std::string strRet(pStrGBK);
delete[] pStrGBK;
return strRet;
}
return strUtf8; // by default
}
std::string GBKToUTF8(const std::string& strGBK)
{
int iUtf8Length = (strGBK.size() + 1) * 4;
char* pStrUtf8 = new char[iUtf8Length];
if (charset_convert("gb2312", "UTF-8", const_cast<char*>(strGBK.c_str()), strGBK.size(), pStrUtf8, iUtf8Length) != -1)
{
std::string strRet(pStrUtf8);
delete[] pStrUtf8;
return strRet;
}
return strGBK; // by default
}
#endif