环境:Win7 x64,VS2015
内容:C++ 相关编码操作
代码:
CodeUitls.hpp
#pragma once
#include <iostream>
#include <vector>
/*
说明:
**多字节:1个字符对应1个字节
**宽字节:1个字符对应多个字节 ,目的是为了支持世界其他地区的符号,如中文,其他语言等
Unicode就是宽字节,Unicode编码可以细分为 Utf8(3 个字节),Gbk(2 个字节)....
**Gbk包括所有的汉字,包括简体和繁体。而gb2312则只包括简体汉字
**Notepad++ 中ANSI编码不是一种编码,而是在不同的系统中,ANSI表示不同的编码。
美国的系统中ANSI编码其实是ASCII编码(ASCII编码不能表示汉字,所以汉字为乱码)
中国的系统中ANSI编码其实是GBK编码(“汉字”正常显示)
韩国的系统中ANSI编码其实是EUC-KR编码(“한국어”正常显示)
**std::string 类如果包含中文,无法拆分单个字符,必须转成wstring后再拆分
**std::string 转 LPCWSTR : std::string -> std::wstring ->std::wstring.c_str()
**std::string 转 LPCSTR : std::string -> std::string.c_str()
*/
namespace CodeUtils
{
std::string GbkToUtf8(const std::string &gbk);
std::string Utf8ToGbk(const std::string &utf8);
std::string WStringToString(const std::wstring &ws);
std::wstring StringToWString(const std::string &str);
//extend func
bool IsChineseChar(const std::wstring &wstr);
std::vector<std::string> SpiteStringCharacter(const std::string &str);
int GetStringChineseCharCount(const std::string &str);
}
CodeUitls.cpp
#include "CodeUitls.hpp"
#include <Windows.h>
std::string CodeUtils::GbkToUtf8(const std::string &str)
{
int nwLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0);
wchar_t * pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴
ZeroMemory(pwBuf, nwLen * 2 + 2);
::MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), pwBuf, nwLen);
int nLen = ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, -1, NULL, NULL, NULL, NULL);
char * pBuf = new char[nLen + 1];
ZeroMemory(pBuf, nLen + 1);
::WideCharToMultiByte(CP_UTF8, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL);
std::string retStr(pBuf);
delete[]pwBuf;
delete[]pBuf;
pwBuf = NULL;
pBuf = NULL;
return retStr;
}
std::string CodeUtils::Utf8ToGbk(const std::string &str)
{
std::string result;
WCHAR *strSrc;
LPSTR szRes;
//获得临时变量的大小
int i = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);
strSrc = new WCHAR[i + 1];
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, strSrc, i);
//获得临时变量的大小
i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
szRes = new CHAR[i + 1];
WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);
result = szRes;
delete[]strSrc;
delete[]szRes;
return result;
}
std::string CodeUtils::WStringToString(const std::wstring &ws)
{
std::string strLocale = setlocale(LC_ALL, "");
size_t ptNumOfCharConverted;
int charArrayCount = ws.size() * 2 + 2;
char* dst = new char[charArrayCount];
const wchar_t* wchSrc = ws.c_str();
wcstombs_s(&ptNumOfCharConverted, dst, charArrayCount, ws.c_str(), charArrayCount);
std::string strResult = dst;
delete[] dst;
setlocale(LC_ALL, strLocale.c_str());
return strResult;
}
std::wstring CodeUtils::StringToWString(const std::string &str)
{
std::wstring wContext = L"";
int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), NULL, 0);
WCHAR* buffer = new WCHAR[len + 1];
MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), buffer, len);
buffer[len] = '\0';
wContext.append(buffer);
delete[] buffer;
return wContext;
}
bool CodeUtils::IsChineseChar(const std::wstring & wstr)
{
if (wstr.size() == 1)
{
unsigned char* pCh = (unsigned char*)&wstr[0];
if (((*pCh >= 0) && (*pCh <= 0xff)) && (*(pCh + 1) >= 0x4e && *(pCh + 1) <= 0x9f))
{
return true;
}
}
return false;
}
std::vector<std::string> CodeUtils::SpiteStringCharacter(const std::string & str)
{
std::vector<std::string> res;
std::wstring wContext = StringToWString(str);
for (int i = 0; i < wContext.length(); ++i)
{
std::wstring tmp = wContext.substr(i, 1);
res.push_back(WStringToString(tmp));
}
return res;
}
int CodeUtils::GetStringChineseCharCount(const std::string & str)
{
std::wstring wContext = StringToWString(str);
int chineseCharCount = 0;
for (int i = 0; i < wContext.length(); ++i)
{
if (IsChineseChar(wContext.substr(i, 1)))
{
++chineseCharCount;
}
}
return chineseCharCount;
}