c++ 字符编码转换

文章介绍了在C++中处理字符串(string和wstring)转换的方法,包括使用标准库函数、COMutility头文件以及locale和codecvt进行宽窄字符转换,特别关注了中文字符的处理和VS环境中的UTF-8编码问题。
摘要由CSDN通过智能技术生成

方法一

#include <Windows.h>
#include <string>

//将string转换成wstring  
wstring string2wstring(string str)  
{  
    wstring result;  
    //获取缓冲区大小,并申请空间,缓冲区大小按字符计算  
    int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), NULL, 0);  
    TCHAR* buffer = new TCHAR[len + 1];  
    //多字节编码转换成宽字节编码  
    MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), buffer, len);  
    buffer[len] = '\0';             //添加字符串结尾  
    //删除缓冲区并返回值  
    result.append(buffer);  
    delete[] buffer;  
    return result;  
}  
 
//将wstring转换成string  
string wstring2string(wstring wstr)  
{  
    string result;  
    //获取缓冲区大小,并申请空间,缓冲区大小事按字节计算的  
    int len = WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), wstr.size(), NULL, 0, NULL, NULL);  
    char* buffer = new char[len + 1];  
    //宽字节编码转换成多字节编码  
    WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), wstr.size(), buffer, len, NULL, NULL);  
    buffer[len] = '\0';  
    //删除缓冲区并返回值  
    result.append(buffer);  
    delete[] buffer;  
    return result;  
}

方法二

#include <comutil.h>  
#include <string>
#pragma comment(lib, "comsuppw.lib")
 
string ws2s(const wstring& ws)
{
    _bstr_t t = ws.c_str();  
    char* pchar = (char*)t;  
    string result = pchar;  
    return result;  
}
 
wstring s2ws(const string& s)
{
    _bstr_t t = s.c_str();  
    wchar_t* pwchar = (wchar_t*)t;  
    wstring result = pwchar;  
    return result; 
}

方法三

#include <locale>
#include <codecvt>
#include <string>

//convert string to wstring
inline std::wstring to_wide_string(const std::string& input)
{
  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
  return converter.from_bytes(input);
}

//convert wstring to string 
inline std::string to_byte_string(const std::wstring& input)
{
  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
  return converter.to_bytes(input);
}

方法三在vs环境中运行时,输入中带有中文时converter.from_bytes会抛出异常,可作如下修改

//convert string to wstring
inline std::wstring to_wide_string(const std::string& input)
{
  std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> decode(new std::codecvt<wchar_t, char, std::mbstate_t>("CHS"));
  return decode.from_bytes(input);
}

//convert wstring to string 
inline std::string to_byte_string(const std::wstring& input)
{
  std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> decode(new std::codecvt<wchar_t, char, std::mbstate_t>("CHS"));
  return decode.to_bytes(input);
}

或者将程序的执行字符集设置为UTF-8 1 2

方法四

//转换宽窄字符串时可以成功,并且支持中文转换
const std::wstring s2ws_local(const std::string& s)
{
  std::locale old_loc = std::locale::global(std::locale(""));
  const char* src_str = s.c_str();
  const size_t buffer_size = s.size() + 1;
  wchar_t* dst_wstr = new wchar_t[buffer_size];
  wmemset(dst_wstr, 0, buffer_size);
  mbstowcs(dst_wstr, src_str, buffer_size);
  std::wstring result = dst_wstr;
  delete[]dst_wstr;
  std::locale::global(old_loc);
  return result;
}

const std::string ws2s_local(const std::wstring& ws)
{
  std::locale old_loc = std::locale::global(std::locale(""));
  const wchar_t* src_wstr = ws.c_str();
  size_t buffer_size = ws.size() * 4 + 1;
  char* dst_str = new char[buffer_size];
  memset(dst_str, 0, buffer_size);
  wcstombs(dst_str, src_wstr, buffer_size);
  std::string result = dst_str;
  delete[]dst_str;
  std::locale::global(old_loc);
  return result;
}

全部代码如下

#include <Windows.h>
#include <string>
#include <iostream>
#include <locale>
#include <codecvt>
#include <vector>

#include <comutil.h>  
#pragma comment(lib, "comsuppw.lib")

using namespace std;


//转换宽窄字符串时可以成功,并且支持中文转换
const std::wstring s2ws_local(const std::string& s)
{
  std::locale old_loc = std::locale::global(std::locale(""));
  const char* src_str = s.c_str();
  const size_t buffer_size = s.size() + 1;
  wchar_t* dst_wstr = new wchar_t[buffer_size];
  wmemset(dst_wstr, 0, buffer_size);
  mbstowcs(dst_wstr, src_str, buffer_size);
  std::wstring result = dst_wstr;
  delete[]dst_wstr;
  std::locale::global(old_loc);
  return result;
}

const std::string ws2s_local(const std::wstring& ws)
{
  std::locale old_loc = std::locale::global(std::locale(""));
  const wchar_t* src_wstr = ws.c_str();
  size_t buffer_size = ws.size() * 4 + 1;
  char* dst_str = new char[buffer_size];
  memset(dst_str, 0, buffer_size);
  wcstombs(dst_str, src_wstr, buffer_size);
  std::string result = dst_str;
  delete[]dst_str;
  std::locale::global(old_loc);
  return result;
}


string ws2s(const wstring& ws)
{
  _bstr_t t = ws.c_str();
  char* pchar = (char*)t;
  string result = pchar;
  return result;
}

wstring s2ws(const string& s)
{
  _bstr_t t = s.c_str();
  wchar_t* pwchar = (wchar_t*)t;
  wstring result = pwchar;
  return result;
}

//convert string to wstring
inline std::wstring to_wide_string(const std::string& input)
{
  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
  return converter.from_bytes(input);

  //std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> decode(new std::codecvt<wchar_t, char, std::mbstate_t>("CHS"));
  //return decode.from_bytes(input);
}

//convert wstring to string 
inline std::string to_byte_string(const std::wstring& input)
{

  std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
  return converter.to_bytes(input);

  //std::wstring_convert<std::codecvt<wchar_t, char, std::mbstate_t>> decode(new std::codecvt<wchar_t, char, std::mbstate_t>("CHS"));
  //return decode.to_bytes(input);
}


//将string转换成wstring  
wstring string2wstring(string str)
{
  wstring result;
  //获取缓冲区大小,并申请空间,缓冲区大小按字符计算  
  int len = MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), NULL, 0);
  TCHAR* buffer = new TCHAR[len + 1];
  //多字节编码转换成宽字节编码  
  MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.size(), buffer, len);
  buffer[len] = '\0';             //添加字符串结尾  
  //删除缓冲区并返回值  
  result.append(buffer);
  delete[] buffer;
  return result;
}

//将wstring转换成string  
string wstring2string(wstring wstr)
{
  string result;
  //获取缓冲区大小,并申请空间,缓冲区大小事按字节计算的  
  int len = WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), wstr.size(), NULL, 0, NULL, NULL);
  char* buffer = new char[len + 1];
  //宽字节编码转换成多字节编码  
  WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), wstr.size(), buffer, len, NULL, NULL);
  buffer[len] = '\0';
  //删除缓冲区并返回值  
  result.append(buffer);
  delete[] buffer;
  return result;
}

int main()
{
  wstring wide_text = string2wstring("hello, 你好");
  string narrow_text = wstring2string(wide_text);
  std::cout << narrow_text << std::endl;

  wide_text = s2ws("hello, 你好");
  narrow_text = ws2s(wide_text);
  std::cout << narrow_text << std::endl;

  wide_text = to_wide_string("hello, 你好");
  narrow_text = to_byte_string(wide_text);
  std::cout << narrow_text << std::endl;

  wide_text = s2ws_local("hello, 你好");
  narrow_text = ws2s_local(wide_text);
  std::cout << narrow_text << std::endl;

  return 0;
}

参考:


  1. VS 与 UTF-8 编码 ↩︎

  2. /utf-8(将源字符集和执行字符集设置为 UTF-8) ↩︎

  • 10
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值