utf8转unicode

最新推荐文章于 2024-07-18 15:29:38 发布

lx111000lx0

最新推荐文章于 2024-07-18 15:29:38 发布

阅读量777

点赞数

分类专栏：练习文章标签： utf-8 unicode utf8 C++

本文链接：https://blog.csdn.net/lx111000lx0/article/details/8623050

版权

练习专栏收录该内容

16 篇文章 0 订阅

订阅专栏

1.windows下需要unicode来处理非ascii的字符,如中文文件路径.
2.但是做字符串处理时又需要转换成中间的utf8处理,这就涉及到了互转.
3.参考unicode和utf8关系:

http://baike.baidu.com/view/40801.htm

#include <stdlib.h>
#include <cstdio>
#include <fstream>

using namespace std;


void Utf82Unicode(const char* utf8, string* unicode)
{
	char* utf_8 = strdup(utf8);
	char* curr_utf_8 = utf_8;
	long utf_8_index = 0;
	long utf8_size = strlen(utf8);
	long unicode_index = 0;

	unsigned char ch;
	char ch_null = 0x0;

	int is_done = 0;
	while(utf_8_index < utf8_size)
	{
		is_done = 0;
		ch = curr_utf_8[0];

		ch = ch >> 4;
		if(ch == 0xF)
		{
			*unicode += (curr_utf_8[3] & 0x3F) | ((curr_utf_8[2] & 0x3) << 6);
			*unicode += ((curr_utf_8[2] & 0x3C) >> 2) | ((curr_utf_8[1] & 0xF) << 4);
			*unicode += ((curr_utf_8[1] & 0x3C) >> 4) | ((curr_utf_8[0] & 0x07) << 2);

			utf_8_index += 4;

			is_done = 1;
		}

		ch = ch >> 1;
		if(!is_done && ch == 0x07)
		{
			*unicode += (curr_utf_8[2] & 0x3F) | ((curr_utf_8[1] & 0x3) << 6);
			*unicode += ((curr_utf_8[1] & 0x3C) >> 2) | ((curr_utf_8[0] & 0xF) << 4);
			*unicode += ch_null;

			utf_8_index += 3;

			is_done = 1;
		}

		ch = ch >> 1;
		if(!is_done && ch == 0x03)
		{
			*unicode += (curr_utf_8[1] & 0x3F) | ((curr_utf_8[0] & 0x3) << 6);
			*unicode += (curr_utf_8[0] & 0x3C) >> 2;
			*unicode += ch_null;

			utf_8_index += 2;

			is_done = 1;
		}

		ch = ch >> 1;
		if(!is_done && ch == 0x0)
		{
			*unicode += curr_utf_8[0];
			*unicode += ch_null;
			*unicode += ch_null;

			utf_8_index += 1;

			is_done = 1;
		}

		curr_utf_8 = utf_8 + utf_8_index;
	}
}

int main()
{
	char utf82[12];
	memset(utf82, 0, 12);
	utf82[0] = 0xE6;
	utf82[1] = 0x88;
	utf82[2] = 0x91;
	string unicode2;
	Utf82Unicode(utf82, &unicode2);
	printf("%x,%x\n", (unsigned char) unicode2[0], unicode2[1]);
	cout<<unicode2<<":"<<unicode2.size() <<endl;

return 0;
}