wstring(utf16)转string(utf8)

http://www.linuxquestions.org/questions/programming-9/wstring-utf8-conversion-in-pure-c-701084/

str.h文件

#ifndef STR_H
#define STR_H
#include <string>
#include <iostream>

typedef std::string Str;
typedef std::wstring WStr;

std::ostream& operator<<(std::ostream& f, const WStr& s);
std::istream& operator>>(std::istream& f, WStr& s);
void utf8toWStr(WStr& dest, const Str& src);
void wstrToUtf8(Str& dest, const WStr& src);

#endif
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

str.cpp文件

#include "Str.h"
#ifdef UTF8TEST
#include <stdio.h>
#endif

void utf8toWStr(WStr& dest, const Str& src){
	dest.clear();
	wchar_t w = 0;
	int bytes = 0;
	wchar_t err = L'�';
	for (size_t i = 0; i < src.size(); i++){
		unsigned char c = (unsigned char)src[i];
		if (c <= 0x7f){//first byte
			if (bytes){
				dest.push_back(err);
				bytes = 0;
			}
			dest.push_back((wchar_t)c);
		}
		else if (c <= 0xbf){//second/third/etc byte
			if (bytes){
				w = ((w << 6)|(c & 0x3f));
				bytes--;
				if (bytes == 0)
					dest.push_back(w);
			}
			else
				dest.push_back(err);
		}
		else if (c <= 0xdf){//2byte sequence start
			bytes = 1;
			w = c & 0x1f;
		}
		else if (c <= 0xef){//3byte sequence start
			bytes = 2;
			w = c & 0x0f;
		}
		else if (c <= 0xf7){//3byte sequence start
			bytes = 3;
			w = c & 0x07;
		}
		else{
			dest.push_back(err);
			bytes = 0;
		}
	}
	if (bytes)
		dest.push_back(err);
}

void wstrToUtf8(Str& dest, const WStr& src){
	dest.clear();
	for (size_t i = 0; i < src.size(); i++){
		wchar_t w = src[i];
		if (w <= 0x7f)
			dest.push_back((char)w);
		else if (w <= 0x7ff){
			dest.push_back(0xc0 | ((w >> 6)& 0x1f));
			dest.push_back(0x80| (w & 0x3f));
		}
		else if (w <= 0xffff){
			dest.push_back(0xe0 | ((w >> 12)& 0x0f));
			dest.push_back(0x80| ((w >> 6) & 0x3f));
			dest.push_back(0x80| (w & 0x3f));
		}
		else if (w <= 0x10ffff){
			dest.push_back(0xf0 | ((w >> 18)& 0x07));
			dest.push_back(0x80| ((w >> 12) & 0x3f));
			dest.push_back(0x80| ((w >> 6) & 0x3f));
			dest.push_back(0x80| (w & 0x3f));
		}
		else
			dest.push_back('?');
	}
}

Str wstrToUtf8(const WStr& str){
	Str result;
	wstrToUtf8(result, str);
	return result;
}

WStr utf8toWStr(const Str& str){
	WStr result;
	utf8toWStr(result, str);
	return result;
}

std::ostream& operator<<(std::ostream& f, const WStr& s){
	Str s1;
	wstrToUtf8(s1, s);
	f << s1;
	return f;
}

std::istream& operator>>(std::istream& f, WStr& s){
	Str s1;
	f >> s1;
	utf8toWStr(s, s1);
	return f;
}

#ifdef UTF8TEST
bool utf8test(){
	WStr w1;
	//for (wchar_t c = 1; c <= 0x10ffff; c++){
	for (wchar_t c = 0x100000; c <= 0x100002; c++){
		w1 += c;	
	}
	Str s = wstrToUtf8(w1);
	WStr w2 = utf8toWStr(s);
	bool result = true;
	if (w1.length() != w2.length()){
		printf("length differs\n");
		//std::cout << "length differs" << std::endl;
		result = false;
	}
	
	printf("w1: %S\ns: %s\nw2: %S\n", w1.c_str(), s.c_str(), w2.c_str());
	
	for (size_t i = 0; i < w1.size(); i++)
		if (w1[i] != w2[i]){
			result = false;
			printf("character at pos %x differs (expected %.8x got %.8x)\n", i, w1[i], w2[i]);
			//std::cout << "character at pos " << i  << " differs" << std::endl;
			break;
		}
		
	if (!result){
		printf("utf8 dump: \n");
		for (size_t i = 0; i < s.size(); i++)
			printf("%2x ", (unsigned char)s[i]);
	}
	
	return result;
}

int main(int argc, char** argv){
	std::wstring ws(L"фыва");
	std::string s("фыва");
	std::cout << ws << s << std::endl;
	std::cout << wstrToUtf8(utf8toWStr("фыва")) << std::endl;
	if (utf8test())
		std::cout << "utf8Test succesful" << std::endl;
	else 
		std::cout << "utf8Test failed" << std::endl;
	return 0;
}
#endif

main.cpp

Basically, I want to do this:

Code:
#include <iostream>
#include <string>
#include <locale>
int main(int argc, char** argv){
	std::wstring ws(L"фыва");
	std::string s("asdf");
	std::cout << ws << s << std::endl;
	return 0;
}
or this
Code:
#include <iostream>
#include <string>
#include <locale>
int main(int argc, char** argv){
	std::wstring ws(L"фыва");
	std::string s("asdf");
	std::wcout << ws << s << std::endl;
	return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值