C++常用字符集转换方法三

风一流世

已于 2022-12-09 10:17:15 修改

阅读量749

点赞数 1

分类专栏： C++ C++字符转换文章标签： c++ 开发语言

于 2022-12-05 13:40:19 首次发布

本文链接：https://blog.csdn.net/fengyunzhenyu/article/details/128185145

版权

C++ 同时被 2 个专栏收录

6 篇文章 0 订阅

订阅专栏

C++字符转换

3 篇文章 0 订阅

订阅专栏

C++常用字符转换UTF8到string类型：
直接C++提供的接口，在window下使用window api，在linux下使用C++接口；此方法也可以直接跨平台使用，在linux、window下都适用
例如：在代码中这接写中文转UTF8码，可以使用如下接口
string str=“中国”
string utf8Str = ansi_to_utf8(str);

#include <string>

std::string utf8_to_ansi(const char* in);
std::string ansi_to_utf8(const char* in);
std::string uni_to_ansi(const wchar_t* in);
std::wstring ansi_to_uni(const char* in);
std::string uni_to_utf8(const wchar_t* in);
std::wstring utf8_to_uni(const char* in);

#include <stdio.h>
#ifdef _WIN32
#include <Windows.h>
#else
#include "uniconv.h"
#endif

#ifdef _WIN32
std::string uni_to_(unsigned int acp, const wchar_t* lpw)
{
	int nChars = (wcslen(lpw) + 1) * 3;
	char* lpa = new char[nChars];
	lpa[0] = 0;

	int ret = WideCharToMultiByte(acp, 0, lpw, -1, lpa, nChars, NULL, NULL);
	if (ret == 0) // error?
		return "";

	std::string strlpa = lpa;
	delete[]lpa;
	return strlpa;
}
std::wstring _to_uni(unsigned int acp, const char* lpa)
{
	int nChars = strlen(lpa) + 1;
	wchar_t* lpw = new wchar_t[nChars];
	lpw[0] = 0;

	int ret = MultiByteToWideChar(acp, 0, lpa, -1, lpw, nChars);
	if (ret == 0)
		return L"";

	std::wstring wstrlpw = lpw;
	delete[]lpw;
	return wstrlpw;
}
#elif defined(__linux__)
int codeset_conv(const char* from_codeset, const char* to_codeset, char* in, size_t inlen, char* out, size_t outlen)
{
	iconv_t cd;
	int		ret = 0;

	cd = iconv_open(to_codeset, from_codeset);
	if (cd == (iconv_t)-1)
		return errno;

	if (iconv(cd,
#ifdef NEOWARE_NL3
	(const char**)&in,
#else
		& in,
#endif
		& inlen, &out, &outlen) == -1)
	{
		ret = errno;
		fprintf(stderr, "%d %s\n", errno, strerror(errno));
	}

	iconv_close(cd);
	return ret;
}

std::string codeset_conv(const char* from_codeset, const char* to_codeset, const char* in)
{
	size_t inlen = strlen(in) + 1;
	size_t outlen = inlen * 2;
	char* in1, * in2;
	char* out1, * out2;
	in1 = in2 = new char[inlen];
	out1 = out2 = new char[outlen];
	strcpy(in1, in);
	memset(out1, 0, outlen);

	if (0 != codeset_conv(from_codeset, to_codeset, in1, inlen, out1, outlen))
		return in; // If conversion fails, return the original string.

	std::string strout = out2;
	delete[]in2;
	delete[]out2;
	return strout;
}
#endif // __linux__

//

std::string utf8_to_ansi(const char* in)
{
#ifdef _WIN32
	return uni_to_ansi(utf8_to_uni(in).c_str());
#elif defined(__linux__)	
	char* codeset = nl_langinfo(CODESET);
	return codeset_conv("UTF-8", codeset, in);
#endif
}

std::string ansi_to_utf8(const char* in)
{
#ifdef _WIN32
	return uni_to_utf8(ansi_to_uni(in).c_str());
#elif defined(__linux__)
	char* codeset = nl_langinfo(CODESET);
	return codeset_conv(codeset, "UTF-8", in);
#endif
}

std::string uni_to_ansi(const wchar_t* in)
{
#ifdef _WIN32
	return uni_to_(CP_ACP, in);
#elif defined(__linux__)
	// TODO: to be implemented
#endif
	return "";
}

std::wstring ansi_to_uni(const char* in)
{
#ifdef _WIN32
	return _to_uni(CP_ACP, in);
#elif defined(__linux__)
	// TODO: to be implemented
#endif
	return L"";
}

std::string uni_to_utf8(const wchar_t* in)
{
#ifdef _WIN32
	return uni_to_(CP_UTF8, in);
#elif defined(__linux__)
	// TODO: to be implemented
#endif
	return "";
}

std::wstring utf8_to_uni(const char* in)
{
#ifdef _WIN32
	return _to_uni(CP_UTF8, in);
#elif defined(__linux__)
	// TODO: to be implemented
#endif
	return L"";
}

另外一种：

#ifdef _MSC_VER
std::string UTF8ToGBK(const std::string& strUtf8)
{
	int len = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);  
	WCHAR* wszGBK = new WCHAR[len + 1];  
	memset(wszGBK, 0, len * sizeof(WCHAR) + 2);  
	MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wszGBK, len);  

	len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);  
	char *szGBK = new char[len + 1];  
	memset(szGBK, 0, len + 1);  
	WideCharToMultiByte(CP_ACP,0, wszGBK, -1, szGBK, len, NULL, NULL);  
	std::string strTemp(szGBK);  
	delete[]szGBK;  
	delete[]wszGBK;  
	return strTemp; 
}

std::string GBKToUTF8(const std::string& strGBK)
{
	string strOutUTF8 = "";  
	WCHAR * str1;  
	int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);  
	str1 = new WCHAR[n];  
	MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);  
	n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);  
	char * str2 = new char[n];  
	WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);  
	strOutUTF8 = str2;  
	delete[]str1;  
	str1 = NULL;  
	delete[]str2;  
	str2 = NULL;  
	return strOutUTF8; 
}

#else

#include <iconv.h>

int charset_convert(const char *from_charset, const char *to_charset, char *in_buf, size_t in_left, char *out_buf, size_t out_left) 
{
    iconv_t icd;
    char *pin = in_buf;
    char *pout = out_buf;
    size_t out_len = out_left;
    if ((iconv_t)-1 == (icd = iconv_open(to_charset,from_charset))) 
	{
        return -1;
    }
    if ((size_t)-1 == iconv(icd, &pin, &in_left, &pout, &out_left)) \
	{
        iconv_close(icd);
        return -1;
    }
    out_buf[out_len - out_left] = 0;
    iconv_close(icd);
    return (int)out_len - out_left;
} 

std::string UTF8ToGBK(const std::string& strUtf8)
{
	int iGbkLength = (strUtf8.size() + 1) * 4;
	char* pStrGBK = new char[iGbkLength];
	if (charset_convert("UTF-8", "GB2312", const_cast<char*>(strUtf8.c_str()), strUtf8.size(), pStrGBK, iGbkLength) != -1)
	{
		std::string strRet(pStrGBK);
		delete[] pStrGBK;
		return strRet;
	}
	return strUtf8;	// by default
}

std::string GBKToUTF8(const std::string& strGBK)
{
	int iUtf8Length = (strGBK.size() + 1) * 4;
	char* pStrUtf8 = new char[iUtf8Length];
	if (charset_convert("gb2312", "UTF-8", const_cast<char*>(strGBK.c_str()), strGBK.size(), pStrUtf8, iUtf8Length) != -1)
	{
		std::string strRet(pStrUtf8);
		delete[] pStrUtf8;
		return strRet;
	}
	return strGBK;	// by default
}

#endif

风一流世

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
C++常用字符集转换方法三

C++常用字符转换UTF8到string类型：直接C++提供的接口，在window下使用window api，在linux下使用C++接口；此方法也可以直接跨平台使用，在linux、window下都适用例如：在代码中这接写中文转UTF8码，可以使用如下接口string str=“中国”string utf8Str = ansi_to_utf8(str);
复制链接

扫一扫

专栏目录