在编程的时候各种各样的编码转换让人挠头。有的地方需要宽字符有的地方需要窄字符,需要转换。有的地方需要UTF-8编码,有的地方需要UTF-16编码,需要转换。各种各样的编码转换让人头疼,这里呢,就梳理一下各种各种的编码转换方法。让你不再为这个问题头疼。需要注意的是有些方法引用了windows的API无法实现跨平台。
获取文件的编码类型
//获取文件的编码
//找不到文件头的信息的时候可能是ASCII,也可能是UTF8不带bom头的编码需要自己指定一下
enum CODETYPE { ASCII, UTF8, UTF8BOM, UTF16LE, UTF16BE, UTF32LE, UTF32BE, UNKNOWN_TYPE };
CODETYPE GetFileCodeType(const char* file_path)
{
FILE* file = fopen(file_path, "rb");
if (!file)
{
return UNKNOWN_TYPE;
}
int cur_pos = ftell(file);
fseek(file, 0L, SEEK_END);
int file_size = ftell(file);
fseek(file, cur_pos, SEEK_SET);
char* buffer = (char*)malloc(file_size);
fread(buffer, 1, file_size, file);
fclose(file);
if (file_size >= 4)
{
if (memcmp(buffer, "\xFF\xFE\x00\x00", 4) == 0)return UTF32LE;
if (memcmp(buffer, "\x00\x00\xfe\xff", 4) == 0)return UTF32BE;
}
if (file_size >= 3)
{
if (memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) return UTF8BOM;
}
if (file_size >= 2)
{
if (memcmp(buffer, "\xFF\xFE", 2) == 0) return UTF16LE;
if (memcmp(buffer, "\xFE\xFF", 2) == 0) return UTF16BE;
}
return UTF8;
}
wchar_t转char
char* wchar_to_char(const wchar_t* wchar)
{
char * m_char;
int len = WideCharToMultiByte(CP_ACP, 0, wchar, (int)wcslen(wchar), NULL, 0, NULL, NULL);
m_char = new char[len + 1];
WideCharToMultiByte(CP_ACP, 0, wchar, (int)wcslen(wchar), m_char, len, NULL, NULL);
m_char[len] = '\0';
return m_char;
}
//结果是分配的内存记得free掉
char转wchar_t
wchar_t* char_to_wchar(char *s){
int w_nlen=MultiByteToWideChar(CP_ACP,0,s,-1,NULL,0);
wchar_t *ret;
ret=(wchar_t*) malloc(sizeof(WCHAR)*w_nlen);
memset(ret,0,sizeof(ret));
MultiByteToWideChar(CP_ACP,0,s,-1,ret,w_nlen);
return ret;
}
//结果是分配的内存记得free掉
wstring转string
std::string wstr_to_str(const std::wstring &wc)
{
int lenMB = ::WideCharToMultiByte(CP_ACP, 0, wc.c_str(), static_cast<int>(wc.length()), NULL, 0, NULL, NULL);
std::unique_ptr<char> mb(new char[lenMB]());
::WideCharToMultiByte(CP_ACP, 0, wc.c_str(), static_cast<int>(wc.length()), mb.get(), lenMB, NULL, NULL);
return std::string(mb.get(), lenMB);
}
string转wstring
std::wstring str_to_wstr(const std::string &mb)
{
int lenWC = ::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), NULL, 0);
std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), wc.get(), lenWC);
return std::wstring(wc.get(), lenWC);
}
utf-8 wstring转utf-8 string
std::string wstr_to_utf8str(const std::wstring& wc)
{
int lenUTF8 = WideCharToMultiByte(CP_UTF8, 0, wc.c_str(), static_cast<int>(wc.length()), NULL, 0, NULL, NULL);
std::unique_ptr<char> utf8(new char[lenUTF8]());
WideCharToMultiByte(CP_UTF8, 0, wc.c_str(), static_cast<int>(wc.length()), utf8.get(), lenUTF8, NULL, NULL);
return std::string(utf8.get(), lenUTF8);
}
utf-8 string转utf-8 wstring
std::wstring utf8str_to_wstr(const std::string &utf8)
{
int lenWC = ::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), NULL, 0);
std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), wc.get(), lenWC);
return std::wstring(wc.get(), lenWC);
}
UTF-8编码转多字节编码
std::string utf8_to_multibytes(const std::string& utf8)
{
int lenWC = ::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), NULL, 0);
std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
::MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), static_cast<int>(utf8.length()), wc.get(), lenWC);
int lenMB = ::WideCharToMultiByte(CP_ACP, 0, wc.get(), lenWC, NULL, 0, NULL, NULL);
std::unique_ptr<char> mb(new char[lenMB]());
::WideCharToMultiByte(CP_ACP, 0, wc.get(), lenWC, mb.get(), lenMB, NULL, NULL);
return std::string(mb.get(), lenMB);
}
多字节编码转UTF-8编码
std::string multibytes_to_utf8(const std::string &mb)
{
int lenWC = ::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), NULL, 0);
std::unique_ptr<wchar_t> wc(new wchar_t[lenWC]());
::MultiByteToWideChar(CP_ACP, 0, mb.c_str(), static_cast<int>(mb.length()), wc.get(), lenWC);
int lenUTF8 = ::WideCharToMultiByte(CP_UTF8, 0, wc.get(), lenWC, NULL, 0, NULL, NULL);
std::unique_ptr<char> utf8(new char[lenUTF8]());
::WideCharToMultiByte(CP_UTF8, 0, wc.get(), lenWC, utf8.get(), lenUTF8, NULL, NULL);
return std::string(utf8.get(), lenUTF8);
}
大端对齐转小端对齐
void big_to_little(wchar_t* src, unsigned int size)
{
for (usigned int iix = 0; iix < size; ++iix, ++src) {
*src = (((*src) & 0xff00) >> 8) | (((*src) & 0x00ff) << 8);
}
}