Multi-Byte/Unicode支持总结

最新推荐文章于 2023-03-09 20:09:24 发布

我爱孟小赖

最新推荐文章于 2023-03-09 20:09:24 发布

阅读量846

点赞数

文章标签： c++ Unicode ANSI UTF8

很多时候，我们的程序需要支持两种字符集，Multi-Byte与Unicode。

这时就需要进行字符串转换，以及两套API之间的切换。

对两种字符集的统一支持，本文做了一些简单总结。

1、常用转换函数

以下四个函数为Multibyte/Unicode转化基本函数，已处理内存泄露问题。

其他转换可以在这四个基本函数基础上操作。

1.1. Ansi转化为Unicode

[cpp] view plain copy print ?

static wstring AnsiToUnicode(const string& str)
{
int unicodeLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0);
if (unicodeLen <= 0)
{
return L"";
}
wchar_t* pUnicode = new wchar_t[unicodeLen+1];
memset(pUnicode, 0, (unicodeLen+1)*sizeof(wchar_t));
::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, (LPWSTR)pUnicode, unicodeLen);
wstring rt = pUnicode;
delete[] pUnicode;
pUnicode = NULL;
return rt;
}

static wstring AnsiToUnicode(const string& str)
{
    int  unicodeLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0);
    if (unicodeLen <= 0)
    {
        return L"";
    }

    wchar_t* pUnicode = new wchar_t[unicodeLen+1];
    memset(pUnicode, 0, (unicodeLen+1)*sizeof(wchar_t));

    ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, (LPWSTR)pUnicode, unicodeLen);

    wstring rt = pUnicode;

    delete[] pUnicode;
    pUnicode = NULL;

    return rt;
}

1.2. Unicode转化为Ansi

[cpp] view plain copy print ?

static string UnicodeToAnsi(const wstring& str)
{
int iTextLen = WideCharToMultiByte(CP_ACP, 0, str.c_str(), -1, NULL, 0, NULL, NULL);
if (iTextLen <= 0)
{
return "";
}
char* pElementText = new char[iTextLen + 1];
memset(pElementText, 0, sizeof(char)*(iTextLen + 1));
::WideCharToMultiByte(CP_ACP, 0, str.c_str(), -1, pElementText, iTextLen, NULL, NULL);
string strText = pElementText;
delete[] pElementText;
pElementText = NULL;
return strText;
}

static string UnicodeToAnsi(const wstring& str)
{
    int iTextLen = WideCharToMultiByte(CP_ACP, 0, str.c_str(), -1, NULL, 0, NULL, NULL);

    if (iTextLen <= 0)
    {
        return "";
    }

    char* pElementText = new char[iTextLen + 1];
    memset(pElementText, 0, sizeof(char)*(iTextLen + 1));

    ::WideCharToMultiByte(CP_ACP, 0, str.c_str(), -1, pElementText, iTextLen, NULL, NULL);

    string strText = pElementText;
    
    delete[] pElementText;
    pElementText = NULL;
    
    return strText;
}

1.3. UTF-8 to Unicode

[cpp] view plain copy print ?

static wstring UTF8ToUnicode(const string& str)
{
int unicodeLen = ::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);
if (unicodeLen <= 0)
{
return L"";
}
wchar_t* pUnicode = new wchar_t[unicodeLen+1];
memset(pUnicode, 0, (unicodeLen+1)*sizeof(wchar_t));
::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, (LPWSTR)pUnicode, unicodeLen);
wstring rt = pUnicode;
delete[] pUnicode;
pUnicode = NULL;
return rt;
}

static wstring UTF8ToUnicode(const string& str)
{
    int unicodeLen = ::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);

    if (unicodeLen <= 0)
    {
        return L"";
    }

    wchar_t* pUnicode = new  wchar_t[unicodeLen+1];
    memset(pUnicode, 0, (unicodeLen+1)*sizeof(wchar_t));

    ::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, (LPWSTR)pUnicode, unicodeLen);

    wstring  rt = pUnicode;

    delete[] pUnicode;
    pUnicode = NULL;

    return rt;  
}

1.4. Unicode to UTF-8

[cpp] view plain copy print ?

static string UnicodeToUTF8(const wstring& str )
{
int iTextLen = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), -1, NULL, 0, NULL, NULL);
if (iTextLen <= 0)
{
return "";
}
char* pElementText = new char[iTextLen + 1];
memset(pElementText, 0, sizeof(char) * (iTextLen + 1));
::WideCharToMultiByte(CP_UTF8, 0, str.c_str(), -1, pElementText, iTextLen, NULL, NULL);
string strText = pElementText;
delete[] pElementText;
pElementText = NULL;
return strText;
}

static string UnicodeToUTF8(const wstring& str )
{
    int iTextLen = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), -1, NULL, 0, NULL, NULL);

    if (iTextLen <= 0)
    {
        return "";
    }

    char* pElementText = new char[iTextLen + 1];
    memset(pElementText, 0, sizeof(char) * (iTextLen + 1));
    ::WideCharToMultiByte(CP_UTF8, 0, str.c_str(), -1, pElementText, iTextLen, NULL, NULL);

    string strText = pElementText;

    delete[] pElementText;
    pElementText = NULL;

    return strText;
}

2、通用宏使用

MultiByte/Unicode两套字符集下使用API不同，目前已有通用宏来解决此问题。

具体相关宏如下表所示。

通用宏定义	MultiByte API	Unicode API	功能说明
TCHAR	char	wchar_t	字符定义
_ttoi	atoi	_wtoi	字符串转int
_tstof	atof	_wtof	字符串转float
_ttoi64	_atoi64	_wtoi64	字符串转64位整型
_itot	itoa	_itow	int转字符串
_tstof	gcvt	_wtof	float转字符串
_tcslen	strlen	wcslen	获取字符串长度
_tcsstr	strstr	wcsstr	子字符串截取
_tcscpy	strcpy	wcscpy	字符串复制建议使用_tcscpy_s
_stprintf_s	sprintf_s	swprintf_s	字符串赋值

例如，把字符串“123”转换为int型，直接调用_ttoi(_T(“123”))即可，在Multibtye/Unicode字符集下都能正常运行。

3、控制台字符串输出

[cpp] view plain copy print ?

CString strTest = _T(“测试”);
#ifdef UNICODE
wcout <<(const wchar_t*)strTest <<endl;
#else
cout <<(const char*)strTest <<endl;
#endif

原文地址：http://blog.csdn.net/segen_jaa/article/details/7550317

我爱孟小赖

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Multi-Byte/Unicode支持总结

很多时候，我们的程序需要支持两种字符集，Multi-Byte与Unicode。这时就需要进行字符串转换，以及两套API之间的切换。对两种字符集的统一支持，本文做了一些简单总结。1、常用转换函数以下四个函数为Multibyte/Unicode转化基本函数，已处理内存泄露问题。其他转换可以在这四个基本函数基础上操作。1.1. Ansi转化为Unicode
复制链接

扫一扫