VC中实现GB2312、BIG5、Unicode编码转换

转载:http://www.jb51.net/article/52477.htm

一、文件编码格式转换


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
//GB2312 编码文件转换成 Unicode:
if ((file_handle = fopen (filenam, "rb" )) != NULL)
{
     //从GB2312源文件以二进制的方式读取buffer
     numread = fread (str_buf_pool, sizeof ( char ),POOL_BUFF_SIZE,file_handle);
     fclose (file_handle);
    //GB2312文件buffer转换成UNICODE
    nLen  =MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,NULL,0);
    MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,( LPWSTR )str_unicode_buf_pool,nLen);
    //组装UNICODE Little Endian编码文件文件头标示符"0xFF 0xFE"
    //备注:UNICODE Big Endian编码文件文件头标示符"0xFF 0xFE"
    //Little Endian与Big Endian编码差异此处不详述
    unicode_little_file_header[0]=0xFF;
    unicode_little_file_header[1]=0xFE;
    //存储目标文件
    if ((file_handle= fopen (filenewname, "wb+" )) != NULL)
    {
    fwrite (unicode_little_file_header, sizeof ( char ),2,file_handle);
    numwrite = fwrite (str_unicode_buf_pool, sizeof ( LPWSTR ),nLen,file_handle);
    fclose (file_handle);
    }
}

二、字符串编码格式转换


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
//GB2312 转换成 Unicode:
wchar_t * GB2312ToUnicode( const char * szGBString)
{
     UINT nCodePage = 936; //GB2312
     int nLength=MultiByteToWideChar(nCodePage,0,szGBString,-1,NULL,0);
     wchar_t * pBuffer = new wchar_t [nLength+1];
     MultiByteToWideChar(nCodePage,0,szGBString,-1,pBuffer,nLength);
     pBuffer[nLength]=0;
     return pBuffer;
}

//BIG5 转换成 Unicode:
wchar_t * BIG5ToUnicode( const char * szBIG5String)
{
     UINT nCodePage = 950; //BIG5
     int nLength=MultiByteToWideChar(nCodePage,0,szBIG5String,-1,NULL,0);
     wchar_t * pBuffer = new wchar_t [nLength+1];
     MultiByteToWideChar(nCodePage,0,szBIG5String,-1,pBuffer,nLength);
     pBuffer[nLength]=0;
     return pBuffer;
}
//Unicode 转换成 GB2312:
char * UnicodeToGB2312( const wchar_t * szUnicodeString)
{
     UINT nCodePage = 936; //GB2312
     int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL);
     char * pBuffer= new char [nLength+1];
     WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL);
     pBuffer[nLength]=0;
     return pBuffer;
}
//Unicode 转换成 BIG5:
char * UnicodeToBIG5( const wchar_t * szUnicodeString)
{
     UINT nCodePage = 950; //BIG5
     int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL);
     char * pBuffer= new char [nLength+1];
     WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL);
     pBuffer[nLength]=0;
     return pBuffer;
}
//繁体中文BIG5 转换成 简体中文 GB2312
char * BIG5ToGB2312( const char * szBIG5String)
{
     LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);
     wchar_t * szUnicodeBuff = BIG5ToUnicode(szBIG5String);
     char * szGB2312Buff = UnicodeToGB2312(szUnicodeBuff);
     int nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0);
     char * pBuffer = new char [nLength + 1];
     LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength);
     pBuffer[nLength] = 0;
     
     delete [] szUnicodeBuff;
     delete [] szGB2312Buff;
     return pBuffer;
}
//简体中文 GB2312 转换成 繁体中文BIG5
char * GB2312ToBIG5( const char * szGBString)
{
     LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC);
     int nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,NULL,0);
     char * pBuffer= new char [nLength+1];
     LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,pBuffer,nLength);
     pBuffer[nLength]=0;
     wchar_t * pUnicodeBuff = GB2312ToUnicode(pBuffer);
     char * pBIG5Buff = UnicodeToBIG5(pUnicodeBuff);
     delete [] pBuffer;
     delete [] pUnicodeBuff;
     return pBIG5Buff;
}

//GB2312到UTF-8的转换
static int GB2312ToUtf8(const char* gb2312, char* utf8)
{
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//UTF-8到GB2312的转换
static int Utf8ToGB2312(const char* utf8,char* gb2312)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//GB2312到Unicode的转换
static int GB2312ToUnicode(const char* gb2312, char* unicode)
{
UINT nCodePage = 936; //GB2312
int len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
len = len*sizeof(wchar_t);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}


//Unicode到GB2312的转换
static int UnicodeToGB2312(const char* unicode, int size, char*gb2312)
{
UINT nCodePage = 936; //GB2312
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//UTF-8到Unicode的转换
static int Utf8ToUnicode(const char* utf8, char*unicode)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}


//Unicode到UTF-8的转换
static int UnicodeToUtf8(const char* unicode, int size, char* utf8)
{
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}




三、API 函数:MultiByteToWideChar参数说明

第一个参数为代码页, 用 GetLocaleInfo 函数获取当前系统的代码页,936: 简体中文, 950: 繁体中文
第二个参数为选项,一般用 0 就可以了
第三个参数为 ANSI 字符串的地址, 这个字符串是第一个参数指定的语言的 ANSI 字符串 (AnsiString)
第四个参数为 ANSI 字符串的长度,如果用 -1, 就表示是用 0 作为结束符的字符串
第五个参数为转化生成的 unicode 字符串 (WideString) 的地址, 如果为 NULL, 就是代表计算生成的字符串的长度
第六个参数为转化生成的 unicode 字符串缓存的容量,也就是有多少个UNICODE字符。

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值