解决中文字符乱码问题,如何将gbk格式转换utf-8格式

中文乱码的原因是因为编码格式是gbk的,而显示的时候是utf-8格式的,所以需要将其utf-8格式,才能正常显示出来。

1.gbk生成utf8程序需要经过两次转化。

  1.gbk->unicode

  2.unicode->utf8

2.gbkuni30_gen.h是gbk转化的unicode的数组,只有unicode部分,通过另一个文件程序可生成。gbk是索引(此文件可在我的下载资源中找到)

 

源码如下:

#include "gbkuni30_gen.h"

int gbk_to_unicode(unsigned short int* unicode, const char* gbk, int len)

{

    int i,j;

    i = 0;

    unsigned char* gb_temp = (unsigned char *)gbk;

    for(j = 0; i < len; j++)

    {

        if (gb_temp[i] <= 0x80)

        {

            unicode[j] = gb_temp[i];

            i++;

        }

        else

        {

            unsigned short int temp;

            temp = (gb_temp[i] << 8) + gb_temp[i+1];

            unicode[j] = gbkuni30[temp];

            i += 2;

        }

    }

 

    return j;

}

int enc_unicode_to_utf8_one(unsigned long unic, unsigned char *pOutput)

{  

   

  

    if ( unic <= 0x0000007F )  

    {  

        // * U-00000000 - U-0000007F:  0xxxxxxx  

        *pOutput     = (unic & 0x7F);  

        return 1;  

    }  

    else if ( unic >= 0x00000080 && unic <= 0x000007FF )  

    {  

        // * U-00000080 - U-000007FF:  110xxxxx 10xxxxxx  

        *(pOutput+1) = (unic & 0x3F) | 0x80;  

        *pOutput     = ((unic >> 6) & 0x1F) | 0xC0;  

        return 2;  

    }  

    else if ( unic >= 0x00000800 && unic <= 0x0000FFFF )  

    {  

        // * U-00000800 - U-0000FFFF:  1110xxxx 10xxxxxx 10xxxxxx  

        *(pOutput+2) = (unic & 0x3F) | 0x80;  

        *(pOutput+1) = ((unic >>  6) & 0x3F) | 0x80;  

        *pOutput     = ((unic >> 12) & 0x0F) | 0xE0;  

        return 3;  

    }  

    else if ( unic >= 0x00010000 && unic <= 0x001FFFFF )  

    {  

        // * U-00010000 - U-001FFFFF:  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx  

        *(pOutput+3) = (unic & 0x3F) | 0x80;  

        *(pOutput+2) = ((unic >>  6) & 0x3F) | 0x80;  

        *(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80;  

        *pOutput     = ((unic >> 18) & 0x07) | 0xF0;  

        return 4;  

    }  

    else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF )  

    {  

        // * U-00200000 - U-03FFFFFF:  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx  

        *(pOutput+4) = (unic & 0x3F) | 0x80;  

        *(pOutput+3) = ((unic >>  6) & 0x3F) | 0x80;  

        *(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80;  

        *(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80;  

        *pOutput     = ((unic >> 24) & 0x03) | 0xF8;  

        return 5;  

    }  

    else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF )  

    {  

        // * U-04000000 - U-7FFFFFFF:  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx  

        *(pOutput+5) = (unic & 0x3F) | 0x80;  

        *(pOutput+4) = ((unic >>  6) & 0x3F) | 0x80;  

        *(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80;  

        *(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80;  

        *(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80;  

        *pOutput     = ((unic >> 30) & 0x01) | 0xFC;  

        return 6;  

    }  

  

    return 0;  

}  

 

void bgk_to_utf8( const char* gbk, unsigned char *utf8)

{

    int len = (int)strlen(gbk);

    unsigned short unicode[len];

    int reval=0;

    int unicode_len = 0,i;

    unicode_len = gbk_to_unicode(unicode,gbk, len);

   char *pstr=utf8;

    for(i=0;i<unicode_len;i++)

   {

reval=enc_unicode_to_utf8_one(unicode[i], pstr) ;

  pstr=pstr+reval;

    }

*(++pstr)='\0';

}

 

 

 

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值