编码转换参考范例大全

最新推荐文章于 2024-09-13 14:39:26 发布

码农飞飞

最新推荐文章于 2024-09-13 14:39:26 发布

阅读量1k

点赞数 1

分类专栏： C/C++ 文章标签：开发语言字符串 c++

本文链接：https://blog.csdn.net/yang1fei2/article/details/121307140

版权

C/C++ 专栏收录该内容

37 篇文章 26 订阅

订阅专栏

本文详细介绍了编程中遇到的各种编码转换方法，包括获取文件编码类型、宽字符与窄字符、UTF-8与UTF-16之间的转换，以及wstring与string之间的互换。提供了实用的C++代码示例，帮助开发者解决编码问题，但请注意部分方法依赖Windows API，可能不适用于所有平台。

摘要由CSDN通过智能技术生成

在编程的时候各种各样的编码转换让人挠头。有的地方需要宽字符有的地方需要窄字符,需要转换。有的地方需要UTF-8编码,有的地方需要UTF-16编码,需要转换。各种各样的编码转换让人头疼,这里呢,就梳理一下各种各种的编码转换方法。让你不再为这个问题头疼。需要注意的是有些方法引用了windows的API无法实现跨平台。

获取文件的编码类型

//获取文件的编码
//找不到文件头的信息的时候可能是ASCII,也可能是UTF8不带bom头的编码需要自己指定一下
enum CODETYPE { ASCII, UTF8, UTF8BOM, UTF16LE, UTF16BE, UTF32LE, UTF32BE, UNKNOWN_TYPE };
CODETYPE GetFileCodeType(const char* file_path)
{
	FILE* file = fopen(file_path, "rb");
	if (!file)
	{
		return UNKNOWN_TYPE;
	}
	int cur_pos = ftell(file);
	fseek(file, 0L, SEEK_END);
	int file_size = ftell(file);
	fseek(file, cur_pos, SEEK_SET);

	char* buffer = (char*)malloc(file_size);
	fread(buffer, 1, file_size, file);
	fclose(file);

	if (file_size >= 4)
	{
		if (memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0)return UTF32LE;
		if (memcmp(buffer, "\x00\x00\xfe\xff", 4) == 0)return UTF32BE;
	}

	if (file_size >= 3)
	{
		if (memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) return UTF8BOM;
	}

	if (file_size >= 2)
	{
		if (memcmp(buffer, "\xFF\xFE", 2) == 0) return UTF16LE;
		if (memcmp(buffer, "\xFE\xFF", 2) == 0) return UTF16BE;
	}
	return UTF8;
}

wchar_t转char

char* wchar_to_char(const wchar_t* wchar)
{
	char * m_char;
	int len = WideCharToMultiByte(CP_ACP, 0, wchar, (int)wcslen(wchar), NULL, 0, NULL, NULL);
	m_char = new char[len + 1];
	WideCharToMultiByte(CP_ACP, 0, wchar, (int)wcslen(wchar), m_char, len, NULL, NULL);
	m_char[len] = '\0';
	return m_char;
}
//结果是分配的内存记得free掉

char转wchar_t

wchar_t* char_to_wchar(char *s){
int w_nlen=MultiByteToWideChar(CP_ACP,0,s,-1,NULL,0);
wchar_t *ret;
ret=(wchar_t*) malloc(sizeof(WCHAR)*w_nlen);
memset(ret,0,sizeof(ret));
MultiByteToWideChar(CP_ACP,0,s,-1,ret,w_nlen);
return ret;
}
//结果是分配的内存记得free掉

wstring转string

std::string wstr_to_str(const std::wstring &wc)
{
    int lenMB = ::WideCharToMultiByte(CP_ACP, 0, wc.c_str(), static_cast<int>(wc.length()), NULL, 0, NULL, NULL);
    std::unique_ptr<char> mb(new char[lenMB]());
    ::WideCharToMultiByte(CP_ACP, 0, wc.c_str(), static_cast<int>(wc.length()), mb.get(), lenMB, NULL, NULL);
    return std::string(mb.get(), lenMB);
}

string转wstring

std::wstring str_to_wstr(const std::string &mb)
{
    int lenWC = ::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), NULL, 0);
    std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
    ::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), wc.get(), lenWC);
    return std::wstring(wc.get(), lenWC);
}

utf-8 wstring转utf-8 string

std::string wstr_to_utf8str(const std::wstring& wc)
{
    int lenUTF8 = WideCharToMultiByte(CP_UTF8, 0, wc.c_str(), static_cast<int>(wc.length()), NULL, 0, NULL, NULL);
    std::unique_ptr<char> utf8(new char[lenUTF8]());
    WideCharToMultiByte(CP_UTF8, 0, wc.c_str(), static_cast<int>(wc.length()), utf8.get(), lenUTF8, NULL, NULL);
    return std::string(utf8.get(), lenUTF8);
}

utf-8 string转utf-8 wstring

std::wstring utf8str_to_wstr(const std::string &utf8)
{
    int lenWC = ::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), NULL, 0);
    std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
    ::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), wc.get(), lenWC);
    return std::wstring(wc.get(), lenWC);
}

UTF-8编码转多字节编码

std::string utf8_to_multibytes(const std::string& utf8)
{
    int lenWC = ::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), NULL, 0);
    std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
    ::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), wc.get(), lenWC);

    int lenMB = ::WideCharToMultiByte(CP_ACP, 0, wc.get(), lenWC, NULL, 0, NULL, NULL);
    std::unique_ptr<char> mb(new char[lenMB]());
    ::WideCharToMultiByte(CP_ACP, 0, wc.get(), lenWC, mb.get(), lenMB, NULL, NULL);

    return std::string(mb.get(), lenMB);
}

多字节编码转UTF-8编码

std::string multibytes_to_utf8(const std::string &mb)
{
    int lenWC = ::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), NULL, 0);
    std::unique_ptr<wchar_t>  wc(new wchar_t[lenWC]());
    ::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), wc.get(), lenWC);

    int lenUTF8 = ::WideCharToMultiByte(CP_UTF8, 0, wc.get(), lenWC, NULL, 0, NULL, NULL);
    std::unique_ptr<char> utf8(new char[lenUTF8]());
    ::WideCharToMultiByte(CP_UTF8, 0, wc.get(), lenWC, utf8.get(), lenUTF8, NULL, NULL);

    return std::string(utf8.get(), lenUTF8);
}

大端对齐转小端对齐

void big_to_little(wchar_t* src, unsigned int size) 
{
	for (usigned int iix = 0; iix < size; ++iix, ++src) {
		*src = (((*src) & 0xff00) >> 8) | (((*src) & 0x00ff) << 8);
	}
}