// ConsoleApplication2.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include <windows.h>
#include <stdio.h>
#include <tchar.h>
#include <iostream>
#include <fstream>
using namespace std;
WCHAR * UTF_8ToUnicode(char *pText, WCHAR &unicode)
{
/*
UTF-8是一种多字节编码的字符集,表示一个Unicode字符时,它可以是1个至多个字节,在表示上有规律:
1字节:0xxxxxxx
2字节:110xxxxx 10xxxxxx
3字节:1110xxxx 10xxxxxx 10xxxxxx
4字节:11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
char *uchar = (char *)&unicode;
uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);
return &unicode;
}
char * UnicodeToGB2312(WCHAR uData, char buffer[2])
{
WideCharToMultiByte(CP_ACP, NULL, &uData, 1, buffer, sizeof(WCHAR), NULL, NULL);
return buffer;
}
char * TranslateUTF8ToGB(char *str, size_t len)
{
char * newCharBuffer = new char[len];
int index = 0;
int nCBIndex = 0;
WCHAR wTemp = 0;
char cTemp[2] = " ";
while (index < len)
{
if (str[index] == 0)
break;
else if (str[index] > 0) // 如果是GB2312的字符
{
newCharBuffer[nCBIndex] = str[index]; //直接复制
index += 1; //源字符串偏移量1
nCBIndex += 1; //目标字符串偏移量1
}
else //如果是UTF-8的字符
{
UTF_8ToUnicode(str + index, wTemp); //先把UTF-8转成Unicode
UnicodeToGB2312(wTemp, &newCharBuffer[nCBIndex]); //再把Unicode 转成 GB2312
index += 3; //源字符串偏移量3
nCBIndex += 2; //目标字符串偏移量2 因为一个中文UTF-8占3个字节,GB2312占两个字节
}
}
newCharBuffer[nCBIndex] = '\0'; //结束符
strcpy(str, newCharBuffer);
delete newCharBuffer; //避免内存泄漏,这是对源代码的稍许修改
newCharBuffer = NULL;
return str;
}
int main()
{
ifstream in("ReadMe.txt", ios::in);
if (!in.is_open())
{
cout << "open fail..." << endl;
in.close();
return 0;
}
char readBuff[1024];
while (!in.eof())
{
in.getline(readBuff, 1024);
TranslateUTF8ToGB(readBuff, 1024);
cout << readBuff << endl;
}
in.close();
return 0;
}
ifstream打印汉字乱码问题 UTF-8转GB2312
最新推荐文章于 2024-07-12 00:30:37 发布