mfc 中文乱码转换为正常中文_中文编码转换

最新推荐文章于 2024-04-28 15:15:36 发布

冯爽妹

最新推荐文章于 2024-04-28 15:15:36 发布

阅读量1.6k

点赞数

文章标签： mfc 中文乱码转换为正常中文

本文链接：https://blog.csdn.net/weixin_28921621/article/details/112833489

版权

// Coder.cpp: implementation of the Coder class.

#include "stdafx.h"

#include "Coder.h"

#include "Encoding.h"

#ifdef _DEBUG

#undef THIS_FILE

static char THIS_FILE[]=__FILE__;

#define new DEBUG_NEW

#endif

// Construction/Destruction

//初始化文件头常量

/*static*/ const byte Coder::UNICODEBOM[2]={0xFF,0xFE};

/*static*/ const byte Coder::UNICODEBEBOM[2]={0xFE,0xFF};

/*static*/ const byte Coder::UTF8BOM[3]={0xEF,0xBB,0xBF};

Coder::Coder()

{

PREDEFINEDSIZE=2097152;//默认一次转换字节大小 2M字节

}

Coder::~Coder()

{

}

//繁体中文BIG5 转换成简体中文 GB2312

char* Coder::BIG5ToGB2312(const char* szBIG5Str)

{

CString msg;

LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);

wchar_t* szUnicodeBuff =MByteToWChar(CP_BIG5,szBIG5Str);

char* szGB2312Buff =WCharToMByte(CP_GB2312,szUnicodeBuff);

int nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0);

char* pBuffer = new char[nLength + 1];

if(!pBuffer)

return NULL;

memset(pBuffer,0,sizeof(char)*(nLength+1));

LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength);

delete[] szUnicodeBuff;

delete[] szGB2312Buff;

return pBuffer;

}

// GB2312 转 GBK

char* Coder::GB2312ToGBK(const char *szGB2312Str)

{

int nStrLen = strlen(szGB2312Str);

if(!nStrLen)

return NULL;

LCID wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);

int nReturn = LCMapString(wLCID, LCMAP_TRADITIONAL_CHINESE, szGB2312Str, nStrLen, NULL, 0);

if(!nReturn)

return NULL;

char *pcBuf = new char[nReturn + 1];

if(!pcBuf)

return NULL;

memset(pcBuf,0,sizeof(char)*(nReturn + 1));

wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);

LCMapString(wLCID, LCMAP_TRADITIONAL_CHINESE, szGB2312Str, nReturn, pcBuf, nReturn);

return pcBuf;

}

// GBK 转换成 GB2312

char* Coder::GBKToGB2312(const char *szGBKStr)

{

int nStrLen = strlen(szGBKStr);

if(!nStrLen)

return NULL;

LCID wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_BIG5);

int nReturn = LCMapString(wLCID, LCMAP_SIMPLIFIED_CHINESE, szGBKStr, nStrLen, NULL, 0);

if(!nReturn)

return NULL;

char *pcBuf = new char[nReturn + 1];

memset(pcBuf,0,sizeof(char)*(nReturn + 1));

wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_BIG5);

LCMapString(wLCID, LCMAP_SIMPLIFIED_CHINESE, szGBKStr, nReturn, pcBuf, nReturn);

return pcBuf;

}

//简繁中文GBK转换成繁体中文Big5

char* Coder::GBKToBIG5(const char *szGBKStr)

{

char *pTemp=NULL;

char *pBuffer=NULL;

pTemp=GBKToGB2312(szGBKStr);

pBuffer=GB2312ToBIG5(pTemp);

delete[] pTemp;

return pBuffer;

}

//繁体中文BIG5转换到简繁中文GBK

char* Coder::BIG5ToGBK(const char *szBIG5Str)

{

char *pTemp=NULL;

char *pBuffer=NULL;

pTemp=BIG5ToGB2312(szBIG5Str);

pBuffer=GB2312ToGBK(pTemp);

delete[] pTemp;

return pBuffer;

}

//简体中文 GB2312 转换成繁体中文BIG5

char* Coder::GB2312ToBIG5(const char* szGB2312Str)

{

LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);

int nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGB2312Str,-1,NULL,0);

char* pBuffer=new char[nLength+1];

if(!pBuffer)

return NULL;

LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGB2312Str,-1,pBuffer,nLength);

pBuffer[nLength]=0;

wchar_t* pUnicodeBuff = MByteToWChar(CP_GB2312,pBuffer);

char* pBIG5Buff = WCharToMByte(CP_BIG5,pUnicodeBuff);

delete[] pBuffer;

delete[] pUnicodeBuff;

return pBIG5Buff;

}

//获取文件编码类型

//Unicode编码文件通过读取文件头判别

//中文编码通过统计文件编码类别来判别判别次数最多为30次

//中文编码的判别存在误差

TextCode Coder::GetCodeType(CString filepath)

{

CFile file;

byte buf[3];//unsigned char

TextCode tctemp;

if(file.Open(filepath,CFile::modeRead))

{

file.Read(buf,3);

if(buf[0]==UTF8BOM[0] && buf[1]==UTF8BOM[1] && buf[2]==UTF8BOM[2])

return UTF8;

else

if(buf[0]==UNICODEBOM[0] &&buf[1]==UNICODEBOM[1] )

return UNICODE ;

else

if(buf[0]==UNICODEBEBOM[0] &&buf[1]==UNICODEBEBOM[1] )

return UNICODEBIGENDIAN;

else

{

int time=30;

while(file.Read(buf,2) &&time )

{

if ( (buf[0]>=176 && buf[0]<=247) && (buf[1]>=160 && buf[1]<=254) )

tctemp=GB2312;

else

if ( (buf[0]>=129 && buf[0]<=255) && ( ( buf[1]>=64 && buf[1]<=126) || ( buf[1]>=161 && buf[1]<=254) ) )

tctemp=BIG5;

else

if ( (buf[0]>=129 && buf[0] <=254) && (buf[1]>=64 && buf[1]<=254))

tctemp=GBK;

time--;

file.Seek(100,CFile::current);//跳过一定字节利于统计全文

}

return tctemp;

}

else

return GB2312;

}

//多字节文件转换为UNICODE、UNICODE big endian文件

BOOL Coder::MBFileToUnicodeFile(CString filesourcepath, CString filesavepath,TextCode tcTo,TextCode tcCur)

{

TextCode curtc;

CFile filesource,filesave;;

char *pChSrc=NULL;

char *pChTemp=NULL;

wchar_t *pwChDes=NULL;

DWORD filelength,readlen,len;

int bufferlen,strlength;

UINT CodePage;

//由于存在误差允许用户自定义转换

if(tcCur!=DefaultCodeType)

curtc=tcCur;

else

curtc=GetCodeType(filesourcepath);

if(curtc>UTF8 || tcTo

return FALSE;

//源文件打开失败或者源文件无内容后者保存文件建立失败均返回转换失败

if(!filesource.Open(filesourcepath,CFile::modeRead) || 0==(filelength=filesource.GetLength()))

return FALSE;

if( !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))

return FALSE;

//预分配内存分配失败则转换失败

if(filelength

bufferlen=filelength;

else

bufferlen=PREDEFINEDSIZE;

pChSrc=new char[bufferlen+1];

if(!pChSrc)

return FALSE;

//根据当前文件类别指定转换代码页

switch(curtc)

{

case GB2312:

CodePage=CP_GB2312;

break;

case GBK:

CodePage=CP_GB2312;//特殊处理

break;

case BIG5:

CodePage=CP_BIG5;

break;

case UTF8:

CodePage=CP_UTF8;

break;

default:

break;

}

//UTF8文件跳过文件

if(UTF8==curtc)

filesource.Seek(3*sizeof(byte),CFile::begin);

//写入文件头

if(UNICODEBIGENDIAN==tcTo)

filesave.Write(&UNICODEBEBOM,2*sizeof(byte));

else

filesave.Write(&UNICODEBOM,2*sizeof(byte));

//读取文件分段转换知道结束

while(filelength>0)

{

memset(pChSrc,0, sizeof(char)*(bufferlen+1));

if(filelength>PREDEFINEDSIZE)

len=PREDEFINEDSIZE;

else

len=filelength;

readlen=filesource.Read(pChSrc,len);

if(!readlen)

break;

//GBK转换为GB2312处理

if(GBK==curtc)

{

pChTemp=pChSrc;

pChSrc=GBKToGB2312(pChSrc);

}

pwChDes=MByteToWChar(CodePage,pChSrc);

if(pwChDes)

{

if(UNICODEBIGENDIAN==tcTo)

UnicodeEndianConvert(pwChDes);

strlength=wcslen(pwChDes)*2;//这里注意写入文件的长度

filesave.Write(pwChDes,strlength);

filesave.Flush();

filelength-=readlen;

}

else

break;

}

delete[] pChSrc;

delete[] pChTemp;

delete[] pwChDes;

return TRUE;

}

wchar_t* Coder::MByteToWChar(UINT CodePage,LPCSTR lpcszSrcStr)

{

LPWSTR lpcwsStrDes=NULL;

int len=MultiByteToWideChar(CodePage,0,lpcszSrcStr,-1,NULL,0);

lpcwsStrDes=new wchar_t[len+1];

if(!lpcwsStrDes)

return NULL;

memset(lpcwsStrDes,0,sizeof(wchar_t)*(len+1));

len=MultiByteToWideChar(CodePage,0,lpcszSrcStr,-1,lpcwsStrDes,len);

if(len)

return lpcwsStrDes;

else

{

delete[] lpcwsStrDes;

return NULL;

}

char* Coder::WCharToMByte(UINT CodePage,LPCWSTR lpcwszSrcStr)

{

char* lpszDesStr=NULL;

int len=WideCharToMultiByte(CodePage,0,lpcwszSrcStr,-1,NULL,0,NULL,NULL);

lpszDesStr=new char[len+1];

memset(lpszDesStr,0,sizeof(char)*(len+1));

if(!lpszDesStr)

return NULL;

len=WideCharToMultiByte(CodePage,0,lpcwszSrcStr,-1,lpszDesStr,len,NULL,NULL);

if(len)

return lpszDesStr;

else

{

delete[] lpszDesStr;

return NULL;

}

//Unicode 和Unicode big endian之间字节序的转换

void Coder::UnicodeEndianConvert(LPWSTR lpwszstr)

{

wchar_t wchtemp[2];

long index;

int len=wcslen(lpwszstr);

if(!len)

return;

//交换高低字节直到遇到结束符

index=0;

while( index

{

wchtemp[0]=lpwszstr[index];

wchtemp[1]=lpwszstr[index+1];

unsigned char high, low;

high = (wchtemp[0] & 0xFF00) >>8;

low = wchtemp[0] & 0x00FF;

wchtemp[0] = ( low <<8) | high;

high = (wchtemp[1] & 0xFF00) >>8;

low = wchtemp[1] & 0x00FF;

wchtemp[1] = ( low <<8) | high;

lpwszstr[index]=wchtemp[0];

lpwszstr[index+1]=wchtemp[1];

index+=2;

}

//Unicode和Unicode big endian文件向多字节文件转换

BOOL Coder::UnicodeFileToMBFile(CString filesourcepath, CString filesavepath,TextCode tcTo)

{

TextCode curtc;

CFile filesource,filesave;;

char *pChDes=NULL;

char *pChTemp=NULL;

wchar_t *pwChSrc=NULL;

DWORD filelength,readlen,len;

int bufferlen,strlength;

UINT CodePage;

curtc=GetCodeType(filesourcepath);

//文件转换类型错误则转换失败

if(curtc<=UTF8 || tcTo>UTF8 || curtc==tcTo)

return FALSE;

//源文件打开失败或者源文件无内容后者保存文件建立失败均转换失败

if(!filesource.Open(filesourcepath,CFile::modeRead) || 0==(filelength=filesource.GetLength()))

return FALSE;

if( !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))

return FALSE;

//预分配内存分配失败则转换失败

if(filelength

bufferlen=filelength;

else

bufferlen=PREDEFINEDSIZE;

pwChSrc=new wchar_t[(bufferlen/2)+1];

if(!pwChSrc)

return FALSE;

//预先决定代码页

switch(tcTo)

{

case GB2312:

CodePage=CP_GB2312;

break;

case GBK:

CodePage=CP_GB2312;//特殊处理

break;

case BIG5:

CodePage=CP_GB2312;//特殊处理

break;

case UTF8:

CodePage=CP_UTF8;

break;

default:

break;

}

filesource.Seek(sizeof(wchar_t),CFile::begin);

while(filelength>0)

{

memset(pwChSrc,0,sizeof(wchar_t)*((bufferlen/2)+1));

if(filelength>PREDEFINEDSIZE)

len=PREDEFINEDSIZE;

else

len=filelength;

readlen=filesource.Read(pwChSrc,len);

if(!readlen)

break;

if(UNICODEBIGENDIAN==curtc)

UnicodeEndianConvert(pwChSrc);

pChDes=WCharToMByte(CodePage,pwChSrc);

//GBK无法直接转换 BIG5直接转换会产生错误二者均先转到GB2312然后再转到目的类型

if(GBK==tcTo)

{

pChTemp=pChDes;

pChDes=GB2312ToGBK(pChDes);

}

if(BIG5==tcTo)

{

pChTemp=pChDes;

pChDes=GB2312ToBIG5(pChDes);

}

if(pChDes)

{

strlength=strlen(pChDes);

filesave.Write(pChDes,strlength);

filesave.Flush();

filelength-=readlen;

}

else

break;

}

delete[] pChDes;

delete[] pChTemp;

delete[] pwChSrc;

return TRUE;

}

//多字节文件转为多字节文件

//多字节转为多字节时，一般先转为UNICODE类型，再转换到指定目的类型，实行两次转换

BOOL Coder::MBFileToMBFile(CString filesourcepath, CString filesavepath,TextCode tcTo,TextCode tcCur)

{

BOOL bret=FALSE;

TextCode curtc;

CFile filesource,filesave;

char *pChDes=NULL;

char *pChSrc=NULL;

DWORD filelength,readlen,len;

int bufferlen,strlength;

UINT CodePageCur,CodePageTo;

//由于存在误差允许用户自定义转换

if(DefaultCodeType!=tcCur)

curtc=tcCur;

else

curtc=GetCodeType(filesourcepath);

//转换类型错误则返回转换失败

if(curtc>UTF8 || tcTo>UTF8 || curtc==tcTo)

return FALSE;

//源文件打开失败或者源文件无内容后者保存文件建立失败均返回转换失败

if(!filesource.Open(filesourcepath,CFile::modeRead) || 0==(filelength=filesource.GetLength()))

return FALSE;

if( !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))

return FALSE;

//预分配内存分配失败则转换失败

if(filelength

bufferlen=filelength;

else

bufferlen=PREDEFINEDSIZE;

pChSrc=new char[bufferlen+1];

if(!pChSrc)

return FALSE;

if(UTF8==curtc)

filesource.Seek(3*sizeof(byte),CFile::begin);

CodePageCur=GetCodePage(curtc);

CodePageTo=GetCodePage(tcTo);

while(filelength>0)

{

memset(pChSrc,0,sizeof(char)*(bufferlen+1));

if(filelength>PREDEFINEDSIZE)

len=PREDEFINEDSIZE;

else

len=filelength;

readlen=filesource.Read(pChSrc,len);

if(!readlen)

break;

pChDes=MByteToMByte(CodePageCur,CodePageTo,pChSrc);

if(pChDes)

{

strlength=strlen(pChDes);

filesave.Write(pChDes,strlength);

filelength-=readlen;

}

else

break;

}

delete[] pChSrc;

delete[] pChDes;

return TRUE;

}

//Unicode 和Unicode big endian文件之间转换

BOOL Coder::UnicodeEndianFileConvert(CString filesourcepath, CString filesavepath,TextCode tcTo)

{

TextCode curtc=GetCodeType(filesourcepath);

if(curtc!=UNICODE && curtc!=UNICODEBIGENDIAN)

return FALSE;

if(curtc==tcTo)

return FALSE;

CFile filesource,filesave;;

wchar_t *pwChDes;

DWORD length;

if(!filesource.Open(filesourcepath,CFile::modeRead) || !filesave.Open(filesavepath,CFile::modeCreate|CFile::modeWrite))

return FALSE;

length=filesource.GetLength();

if(!length)

return FALSE;

pwChDes=new wchar_t[(length/2)+1];

if(!pwChDes)

return FALSE;

memset(pwChDes,0,sizeof(wchar_t)*((length/2)+1));

filesource.Read(pwChDes,length);

UnicodeEndianConvert(pwChDes);

length=wcslen(pwChDes)*2;

if(UNICODE==tcTo)

filesave.Write(&UNICODEBOM,2*sizeof(byte));

else

filesave.Write(&UNICODEBEBOM,2*sizeof(byte));

filesave.Write(pwChDes,length);

filesave.Flush();

delete[] pwChDes;

return TRUE;

}

//文件转到另一种文件

//6种格式文件两两转换共计30种转换

BOOL Coder::FileToOtherFile(CString filesourcepath, CString filesavepath, TextCode tcTo,TextCode tcCur)

{

TextCode curtc;

BOOL bret=FALSE;

if(DefaultCodeType!=tcCur)

curtc=tcCur;

else

curtc=GetCodeType(filesourcepath);

if(curtc==tcTo)

return FALSE;

//UNICODE和UNICODE big endian文件之间转换共2种

if(curtc>=UNICODE&& tcTo>=UNICODE)

bret=UnicodeEndianFileConvert(filesourcepath,filesavepath,tcTo);

else

//多字节文件向 UNICODE和UNICODE big endian文件之间转换共8种

if(curtc=UNICODE)

bret=MBFileToUnicodeFile(filesourcepath,filesavepath,tcTo,curtc);

else

//UNICODE和UNICODE big endian文件向多字节文件转换共8种

if(curtc>=UNICODE && tcTo

bret=UnicodeFileToMBFile(filesourcepath,filesavepath,tcTo);

else

//多字节文件之间转换共12种

if(curtc

bret=MBFileToMBFile(filesourcepath,filesavepath,tcTo,curtc);

return bret;

}

//编码类型转换为字符串

CString Coder::CodeTypeToString(TextCode tc)

{

CString strtype;

switch(tc)

{

case GB2312:

strtype=_T("GB2312");

break;

case BIG5:

strtype=_T("Big5");

break;

case GBK:

strtype=_T("GBK");

break;

case UTF8:

strtype=_T("UTF-8");

break;

case UNICODE:

strtype=_T("Unicode");

break;

case UNICODEBIGENDIAN:

strtype=_T("Unicode big endian");

break;

}

return strtype;

}

//多字节向多字节转换

char* Coder::MByteToMByte(UINT CodePageCur, UINT CodePageTo, const char* szSrcStr)

{

char *pchDes=NULL;

char *pchTemp=NULL;

wchar_t *pwchtemp=NULL;

//三种中文编码之间转换

if(CodePageCur!=CP_UTF8 && CodePageTo!=CP_UTF8)

{

switch(CodePageCur)

{

case CP_GB2312:

{

if(CP_BIG5==CodePageTo)

pchDes=GB2312ToBIG5(szSrcStr);

else

pchDes=GB2312ToGBK(szSrcStr);

break;

}

case CP_BIG5:

{

if(CP_GB2312==CodePageTo)

pchDes=BIG5ToGB2312(szSrcStr);

else

pchDes=BIG5ToGBK(szSrcStr);

break;

}

case CP_GBK:

{

if(CP_GB2312==CodePageTo)

pchDes=GBKToGB2312(szSrcStr);

else

pchDes=GBKToBIG5(szSrcStr);

break;

}

else

{ //从UTF-8转到其他多字节直接转到GB2312 其他形式用GB2312做中间形式

if(CP_UTF8==CodePageCur)

{

pwchtemp=MByteToWChar(CodePageCur,szSrcStr);

if(CP_GB2312==CodePageTo)

{

pchDes=WCharToMByte(CP_GB2312,pwchtemp);

}

else

{

pchTemp=WCharToMByte(CP_GB2312,pwchtemp);

if(CP_GBK==CodePageTo)

pchDes=GB2312ToGBK(pchTemp);

else

pchDes=GB2312ToBIG5(pchTemp);

}

//从其他多字节转到UTF-8

else

{

if(CP_GBK==CodePageCur)

{

pchTemp=GBKToGB2312(szSrcStr);

pwchtemp=MByteToWChar(CP_GB2312,pchTemp);

}

else

pwchtemp=MByteToWChar(CodePageCur,szSrcStr);

pchDes=WCharToMByte(CodePageTo,pwchtemp);

}

delete[] pchTemp;

delete[] pwchtemp;

return pchDes;

}

//获取编码类型对应的代码页

UINT Coder::GetCodePage(TextCode tccur)

{

UINT CodePage;

switch(tccur)

{

case GB2312:

CodePage=CP_GB2312;

break;

case BIG5:

CodePage=CP_BIG5;

break;

case GBK:

CodePage=CP_GBK;

break;

case UTF8:

CodePage=CP_UTF8;

break;

case UNICODEBIGENDIAN:

case UNICODE:

break;

}

return CodePage;

}

//指定转换时默认一次转换字节大小

void Coder::SetDefaultConvertSize(UINT nCount)

{

if(nCount!=0)

PREDEFINEDSIZE=nCount;

}

冯爽妹

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
mfc 中文乱码转换为正常中文_中文编码转换

//Coder.cpp:implementationoftheCoderclass.////////////////////////////////////////////////////////////////////////#include"stdafx.h"#include"Coder.h"#include"Encoding.h"#ifdef_DEBUG#undefTH...
复制链接

扫一扫