string、wstring、UTF-8、UTF-16、UTF-32之间转换

//string转wstring
std:
:wstring string_to_wstring(const std::string& str)
{
setlocale(LC_ALL, “”);
std::int64_t size = mbstowcs(NULL, str.c_str(), 0);
std::wstring w_str;
w_str.resize(size);
//算出代转wstring字节
mbstowcs(w_str.data(), str.c_str(), str.size());
return w_str;
//std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> conv(new std::codecvt<wchar_t, char, std::mbstate_t>(“CHS”));
// return conv.from_bytes(str);

}
//wstring to string
std::string wstring_to_string(const std::wstring& wstr)
{
setlocale(LC_ALL, “”);
//算出代转string字节
std::int64_t size = wcstombs(NULL, wstr.c_str(), 0);
std::string desrt;
desrt.resize(size);
wcstombs(desrt.data(), wstr.c_str(), size);
return desrt;
//方法任意选
//setlocale(LC_ALL, “”);
//std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
// return conv.to_bytes(wstr);

//std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> conv(new std::codecvt<wchar_t, char, std::mbstate_t>(“CHS”));

//return conv.to_bytes(str);
}

//u8string to wstring
std::wstring utf8_to_wstring(const std::string& str)
{
std::wstring_convert< std::codecvt_utf8_utf16<wchar_t> > strCnv;
return strCnv.from_bytes(str);
}

//wstring to string
//wstring to u8string
std::string wstring_to_utf8(const std::wstring& str)
{
std::wstring_convert< std::codecvt_utf8_utf16<wchar_t> > strCnv;
return strCnv.to_bytes(str);

}

// utf-8 to utf16
std::u16string utf8_to_u16string(const std::string& str)
{
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> strCnv;
return strCnv.from_bytes(str);
}

//utf16 to utf-8
std::string u16string_to_utf8(const std::u16string& str)
{
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> strCnv;
return strCnv.to_bytes(str);
}

//utf-8 to utf-32
std::u32string utf8_utf32(std::string str)
{
std::wstring_convert< std::codecvt_utf8<char32_t>, char32_t >strCnv;
return strCnv.from_bytes(str);
}

//utf-32 to utf-8
std::string u32string_to_utf8(std::u32string str)
{
std::wstring_convert< std::codecvt_utf8<char32_t>, char32_t >strCnv;
return strCnv.to_bytes(str);
}

//判断是否UTF8字符串
bool isUtf8(const std::string& string)
{
int c, i, ix, n, j;
for (i = 0, ix = string.length(); i < ix; i++)
{
c = (unsigned char)string[i];
//if (c0x09 || c0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii
if (0x00 <= c && c <= 0x7f) n = 0; // 0bbbbbbb
else if ((c & 0xE0) == 0xC0) n = 1; // 110bbbbb
else if (c == 0xed && i < (ix - 1) && ((unsigned char)string[i + 1] & 0xa0) == 0xa0) return false; //U+d800 to U+dfff
else if ((c & 0xF0) == 0xE0) n = 2; // 1110bbbb
else if ((c & 0xF8) == 0xF0) n = 3; // 11110bbb
//else if ((KaTeX parse error: Expected 'EOF', got '&' at position 3: c &̲ 0xFC) == 0xF8)…c & 0xFE) == 0xFC) n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8
else return false;
for (j = 0; j < n && i < ix; j++) { // n bytes matching 10bbbbbb follow ?
if ((++i == ix) || (((unsigned char)string[i] & 0xC0) != 0x80))
return false;
}
}
return true;
}

读取文件 fstream 和fopen必须支持assic码
//转ASSIC码
std::string utf8ToString(const std::string& str)
{
std::string reuslt = str;
if (!isUtf8(str)){
return std::move(reuslt);
}

std::wstring_convert< std::codecvt_utf8_utf16<wchar_t> > strCnv;
std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>>
asciiConv(new std::codecvt <wchar_t,char,std::mbstate_t>("CHS"));

reuslt = asciiConv.to_bytes(strCnv.from_bytes(str));

//int nwLen = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);

//std::wstring  wStr;
//wStr.resize(nwLen);
//memset(&wStr[0], 0, nwLen * 2 + 2);

//MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &wStr[0], nwLen);

//int nLen = WideCharToMultiByte(CP_ACP, 0, &wStr[0], -1, nullptr, 0, nullptr, nullptr);

//reuslt.clear();
//reuslt.resize(nLen);

//WideCharToMultiByte(CP_ACP, 0, &wStr[0], nwLen, &reuslt[0], nLen, nullptr, nullptr);

return std::move(reuslt);

}

std::wstring String2Wstring(std::string wstr)
{
std::wstring res;
int len = MultiByteToWideChar(CP_ACP, 0, wstr.c_str(), wstr.size(), nullptr, 0);
if( len < 0 ){
return res;
}
wchar_t* buffer = new wchar_t[len + 1];
if( buffer == nullptr){
return res;
}
MultiByteToWideChar(CP_ACP, 0, wstr.c_str(), wstr.size(), buffer, len);
buffer[len] = ‘\0’;
res.append(buffer);
delete[] buffer;
return res;
}

std::string WideByte2Acsi(const std::wstring& wstrcode)
{
int asciisize = ::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, NULL, 0, NULL, NULL);
if (asciisize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception(“Invalid UTF-8 sequence.”);
}
if (asciisize == 0)
{
throw std::exception(“Error in conversion.”);
}
std::vector resultstring(asciisize);
int convresult = ::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, &resultstring[0], asciisize, NULL, NULL);

if (convresult != asciisize)
{
	throw std::exception("La falla!");
}

return std::string(&resultstring[0]);

}
//string下是中文的转换std::string=“我是好人” 转utf-8
std::string UnicodeToUtf8(const std::wstring& in_wStr)
{
int nNeedChars = WideCharToMultiByte(CP_UTF8, 0, in_wStr.c_str(), -1, 0, 0, 0, 0);
if (nNeedChars > 0)//再次判断一下
{
std::vector temp(nNeedChars);
::WideCharToMultiByte(CP_UTF8, 0, in_wStr.c_str(), -1, &temp[0], nNeedChars, 0, 0);
return std::string(&temp[0]);
}

return std::string();

}

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

尹平华

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值