VC中实现GB2312、BIG5、Unicode编码转换

最新推荐文章于 2023-12-18 11:43:52 发布

小郑2013

最新推荐文章于 2023-12-18 11:43:52 发布

阅读量3k

点赞数 1

分类专栏：编码详解

编码详解专栏收录该内容

4 篇文章 0 订阅

订阅专栏

转载：http://www.jb51.net/article/52477.htm

一、文件编码格式转换

 
        //GB2312 编码文件转换成 Unicode： 
       
        if 
        ((file_handle = 
        fopen 
        (filenam, 
        "rb" 
        )) != NULL) 
       
        { 
       
        //从GB2312源文件以二进制的方式读取buffer 
       
        numread = 
        fread 
        (str_buf_pool, 
        sizeof 
        ( 
        char 
        ),POOL_BUFF_SIZE,file_handle); 
       
        fclose 
        (file_handle); 
       
        //GB2312文件buffer转换成UNICODE 
       
        nLen  =MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,NULL,0); 
       
        MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,( 
        LPWSTR 
        )str_unicode_buf_pool,nLen); 
       
        //组装UNICODE Little Endian编码文件文件头标示符"0xFF 0xFE" 
       
        //备注：UNICODE Big Endian编码文件文件头标示符"0xFF 0xFE" 
       
        //Little Endian与Big Endian编码差异此处不详述 
       
        unicode_little_file_header[0]=0xFF; 
       
        unicode_little_file_header[1]=0xFE; 
       
        //存储目标文件 
       
        if 
        ((file_handle= 
        fopen 
        (filenewname, 
        "wb+" 
        )) != NULL) 
       
        { 
       
        fwrite 
        (unicode_little_file_header, 
        sizeof 
        ( 
        char 
        ),2,file_handle); 
       
        numwrite = 
        fwrite 
        (str_unicode_buf_pool, 
        sizeof 
        ( 
        LPWSTR 
        ),nLen,file_handle); 
       
        fclose 
        (file_handle); 
       
        } 
       
        }

二、字符串编码格式转换

 
        //GB2312 转换成 Unicode： 
       
        wchar_t 
        * GB2312ToUnicode( 
        const 
        char 
        * szGBString) 
       
        { 
       
        UINT 
        nCodePage = 936; 
        //GB2312 
       
        int 
        nLength=MultiByteToWideChar(nCodePage,0,szGBString,-1,NULL,0); 
       
        wchar_t 
        * pBuffer = 
        new 
        wchar_t 
        [nLength+1]; 
       
        MultiByteToWideChar(nCodePage,0,szGBString,-1,pBuffer,nLength); 
       
        pBuffer[nLength]=0; 
       
        return 
        pBuffer; 
       
        } 
       
        //BIG5 转换成 Unicode： 
       
        wchar_t 
        * BIG5ToUnicode( 
        const 
        char 
        * szBIG5String) 
       
        { 
       
        UINT 
        nCodePage = 950; 
        //BIG5 
       
        int 
        nLength=MultiByteToWideChar(nCodePage,0,szBIG5String,-1,NULL,0); 
       
        wchar_t 
        * pBuffer = 
        new 
        wchar_t 
        [nLength+1]; 
       
        MultiByteToWideChar(nCodePage,0,szBIG5String,-1,pBuffer,nLength); 
       
        pBuffer[nLength]=0; 
       
        return 
        pBuffer; 
       
        } 
       
        //Unicode 转换成 GB2312： 
       
        char 
        * UnicodeToGB2312( 
        const 
        wchar_t 
        * szUnicodeString) 
       
        { 
       
        UINT 
        nCodePage = 936; 
        //GB2312 
       
        int 
        nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL); 
       
        char 
        * pBuffer= 
        new 
        char 
        [nLength+1]; 
       
        WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL); 
       
        pBuffer[nLength]=0; 
       
        return 
        pBuffer; 
       
        } 
       
        //Unicode 转换成 BIG5： 
       
        char 
        * UnicodeToBIG5( 
        const 
        wchar_t 
        * szUnicodeString) 
       
        { 
       
        UINT 
        nCodePage = 950; 
        //BIG5 
       
        int 
        nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL); 
       
        char 
        * pBuffer= 
        new 
        char 
        [nLength+1]; 
       
        WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL); 
       
        pBuffer[nLength]=0; 
       
        return 
        pBuffer; 
       
        } 
       
        //繁体中文BIG5 转换成 简体中文 GB2312 
       
        char 
        * BIG5ToGB2312( 
        const 
        char 
        * szBIG5String) 
       
        { 
       
        LCID 
        lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC); 
       
        wchar_t 
        * szUnicodeBuff = BIG5ToUnicode(szBIG5String); 
       
        char 
        * szGB2312Buff = UnicodeToGB2312(szUnicodeBuff); 
       
        int 
        nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0); 
       
        char 
        * pBuffer = 
        new 
        char 
        [nLength + 1]; 
       
        LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength); 
       
        pBuffer[nLength] = 0; 
       
        delete 
        [] szUnicodeBuff; 
       
        delete 
        [] szGB2312Buff; 
       
        return 
        pBuffer; 
       
        } 
       
        //简体中文 GB2312 转换成 繁体中文BIG5 
       
        char 
        * GB2312ToBIG5( 
        const 
        char 
        * szGBString) 
       
        { 
       
        LCID 
        lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC); 
       
        int 
        nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,NULL,0); 
       
        char 
        * pBuffer= 
        new 
        char 
        [nLength+1]; 
       
        LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,pBuffer,nLength); 
       
        pBuffer[nLength]=0; 
       
        wchar_t 
        * pUnicodeBuff = GB2312ToUnicode(pBuffer); 
       
        char 
        * pBIG5Buff = UnicodeToBIG5(pUnicodeBuff); 
       
        delete 
        [] pBuffer; 
       
        delete 
        [] pUnicodeBuff; 
       
        return 
        pBIG5Buff; 
       
        }

//GB2312到UTF-8的转换
static int GB2312ToUtf8(const char* gb2312, char* utf8)
{
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//UTF-8到GB2312的转换
static int Utf8ToGB2312(const char* utf8,char* gb2312)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//GB2312到Unicode的转换
static int GB2312ToUnicode(const char* gb2312, char* unicode)
{
UINT nCodePage = 936; //GB2312
int len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
len = len*sizeof(wchar_t);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}


//Unicode到GB2312的转换
static int UnicodeToGB2312(const char* unicode, int size, char*gb2312)
{
UINT nCodePage = 936; //GB2312
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}


//UTF-8到Unicode的转换
static int Utf8ToUnicode(const char* utf8, char*unicode)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len+1];
memset(wstr, 0, len+1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
memcpy(unicode, wstr, len);
if(wstr) delete[] wstr;
return len;
}


//Unicode到UTF-8的转换
static int UnicodeToUtf8(const char* unicode, int size, char* utf8)
{
wchar_t* wstr = new wchar_t[size/2+1];
memcpy(wstr, unicode, size);
int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
if(wstr) delete[] wstr;
return len;
}

三、API 函数：MultiByteToWideChar参数说明

第一个参数为代码页, 用 GetLocaleInfo 函数获取当前系统的代码页，936: 简体中文, 950: 繁体中文
第二个参数为选项，一般用 0 就可以了
第三个参数为 ANSI 字符串的地址, 这个字符串是第一个参数指定的语言的 ANSI 字符串 (AnsiString)
第四个参数为 ANSI 字符串的长度，如果用 -1, 就表示是用 0 作为结束符的字符串
第五个参数为转化生成的 unicode 字符串 (WideString) 的地址, 如果为 NULL, 就是代表计算生成的字符串的长度
第六个参数为转化生成的 unicode 字符串缓存的容量,也就是有多少个UNICODE字符。

小郑2013

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
VC中实现GB2312、BIG5、Unicode编码转换

一、文件编码格式转换12345678910111213141516171819202122//GB2312 编码文件转换成 Unicode：if((file_handle =fopen(filenam,"rb")) != NULL){ //从
复制链接

扫一扫

专栏目录