中文乱码的原因是因为编码格式是gbk的,而显示的时候是utf-8格式的,所以需要将其utf-8格式,才能正常显示出来。
1.gbk生成utf8程序需要经过两次转化。
1.gbk->unicode
2.unicode->utf8
2.gbkuni30_gen.h是gbk转化的unicode的数组,只有unicode部分,通过另一个文件程序可生成。gbk是索引(此文件可在我的下载资源中找到)
源码如下:
#include "gbkuni30_gen.h"
int gbk_to_unicode(unsigned short int* unicode, const char* gbk, int len)
{
int i,j;
i = 0;
unsigned char* gb_temp = (unsigned char *)gbk;
for(j = 0; i < len; j++)
{
if (gb_temp[i] <= 0x80)
{
unicode[j] = gb_temp[i];
i++;
}
else
{
unsigned short int temp;
temp = (gb_temp[i] << 8) + gb_temp[i+1];
unicode[j] = gbkuni30[temp];
i += 2;
}
}
return j;
}
int enc_unicode_to_utf8_one(unsigned long unic, unsigned char *pOutput)
{
if ( unic <= 0x0000007F )
{
// * U-00000000 - U-0000007F: 0xxxxxxx
*pOutput = (unic & 0x7F);
return 1;
}
else if ( unic >= 0x00000080 && unic <= 0x000007FF )
{
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
*(pOutput+1) = (unic & 0x3F) | 0x80;
*pOutput = ((unic >> 6) & 0x1F) | 0xC0;
return 2;
}
else if ( unic >= 0x00000800 && unic <= 0x0000FFFF )
{
// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
*(pOutput+2) = (unic & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 6) & 0x3F) | 0x80;
*pOutput = ((unic >> 12) & 0x0F) | 0xE0;
return 3;
}
else if ( unic >= 0x00010000 && unic <= 0x001FFFFF )
{
// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+3) = (unic & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80;
*pOutput = ((unic >> 18) & 0x07) | 0xF0;
return 4;
}
else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF )
{
// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+4) = (unic & 0x3F) | 0x80;
*(pOutput+3) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80;
*pOutput = ((unic >> 24) & 0x03) | 0xF8;
return 5;
}
else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF )
{
// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+5) = (unic & 0x3F) | 0x80;
*(pOutput+4) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80;
*pOutput = ((unic >> 30) & 0x01) | 0xFC;
return 6;
}
return 0;
}
void bgk_to_utf8( const char* gbk, unsigned char *utf8)
{
int len = (int)strlen(gbk);
unsigned short unicode[len];
int reval=0;
int unicode_len = 0,i;
unicode_len = gbk_to_unicode(unicode,gbk, len);
char *pstr=utf8;
for(i=0;i<unicode_len;i++)
{
reval=enc_unicode_to_utf8_one(unicode[i], pstr) ;
pstr=pstr+reval;
}
*(++pstr)='\0';
}