部分参考:
www.360doc.com/content/12/0926/12/1072296_238242301.shtml
https://blog.csdn.net/wyingquan/article/details/3882432
涉及到的文件unicode_to_gb2312_table.bin,请前往我的csdn资源下载中寻找
源码如下:(使用方法直接拷贝进去一个.c文件,用vc编译一下就ok了,不过需要在本地文件夹放unicode_to_gb2312_table.bin)
// utf8_to_gb2312_in_c.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include "malloc.h"
#include "string.h"
typedef unsigned char u8;
typedef unsigned short u16;
typedef struct unicode_gb
{
unsigned short unicode;
unsigned short gb;
} UNICODE_GB;
#define utf8_malloc malloc
#define utf8_free free
#define UTF8_DEBUG 1
#if defined(UTF8_DEBUG)&&UTF8_DEBUG
#define APP_PRINT printf
#else
#define APP_PRINT(fmt, ...)
#endif
//全局码表handle,需初始化
UNICODE_GB *code_table=NULL;
int code_table_size_in_item = 0;
//获取utf8转unicode的字节个数
int GetUtf8ByteNumForWord(u8 firstCh)
{
u8 temp = 0x80;
int num = 0;
while (temp & firstCh)
{
num++;
temp = (temp >> 1);
}
APP_PRINT("\r\nthe num is: %d", num);
return num;
}
//搜索unicode对应的gb2312码
//参数: unicodeKey- unicode值
//code_table- unicode转gb2312码表
//CODE_TABLE_SIZE- 码表大小,元素结构体个数
//返回值:0- 未找到unicode对应的gb2312值
//非0- 找到的gb2312值
u16 SearchCodeTable(u16 unicodeKey, UNICODE_GB *code_table, int CODE_TABLE_SIZE)
{
int first = 0;
int end = CODE_TABLE_SIZE - 1;
int mid = 0;
if (!code_table)return 0;
if (!CODE_TABLE_SIZE)return 0;
while (first <= end)
{
mid = (first + end) / 2;
if (code_table[mid].unicode == unicodeKey)
{
return code_table[mid].gb;
}
else if (code_table[mid].unicode > unicodeKey)
{
end = mid - 1;
}
else
{
first = mid + 1;
}
}
return 0;
}
//utf8字符串转gb2312字符串
//参数: utf8- utf8字符串
//len- utf8字符串长度,按字节
//temp- 转化后的gb2312值
//返回值: 0- success, others-fail
int Utf8ToGb2312(const char* utf8, int len, char *temp)
{
APP_PRINT("\r\nutf8->unicode: \n");
APP_PRINT("utf8: [");
for (int k = 0; k < len; k++)
{
APP_PRINT("0x%02x ", utf8[k]);
}
APP_PRINT("]\n");
int byteCount = 0;
int i = 0;
int j = 0;
u16 unicodeKey = 0;
u16 gbKey = 0;
//循环解析
while (i < len)
{
switch (GetUtf8ByteNumForWord((u8)utf8[i]))
{
case 0:
temp[j] = utf8[i];
byteCount = 1;
break;
case 2:
temp[j] = utf8[i];
temp[j + 1] = utf8[i + 1];
byteCount = 2;
break;
case 3:
//这里就开始进行UTF8->Unicode
temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
//取得Unicode的值
memcpy(&unicodeKey, (temp + j), 2);
APP_PRINT("\r\nunicode key is: 0x%04X\n", unicodeKey);
//根据这个值查表取得对应的GB2312的值
gbKey = SearchCodeTable(unicodeKey, code_table, code_table_size_in_item);
APP_PRINT("\r\ngb2312 key is: 0x%04X\n", gbKey);
if (gbKey != 0)
{
//here change the byte
//不为0表示搜索到,将高低两个字节调换调成我要的形式
gbKey = (gbKey >> 8) | (gbKey << 8);
APP_PRINT("\r\nafter changing, gb2312 key is: 0x%04X\n", gbKey);
memcpy((temp + j), &gbKey, 2);
}
byteCount = 3;
break;
case 4:
byteCount = 4;
break;
case 5:
byteCount = 5;
break;
case 6:
byteCount = 6;
break;
default:
APP_PRINT("\r\nthe len is more than 6, error\n");
//break;
return -1;
}
i += byteCount;
if (byteCount == 1)
{
j++;
}
else
{
j += 2;
}
}
APP_PRINT("\r\ngb2312: [");
for (int k = 0; k < j; k++)
{
APP_PRINT("0x%02x ", temp[k]);
}
APP_PRINT("]\n");
return 0;
}
//初始化utf8转gb2312转换环境,主要是初始化码表handle和码表大小
//参数:无
//返回值:0- 成功, 其他值- 失败
static FILE *fp=NULL;
#define TABLE_FILE "./unicode_to_gb2312_table.bin"
int Utf8ToGb2312_init(void)
{
long file_size_in_byte;
int ret;
int len;
ret = 0;
fopen_s(&fp, TABLE_FILE, "rb+");
if (!fp)
{
APP_PRINT("\r\nUtf8ToGb2312_init open file fail");
return -1;
}
fseek(fp, 0, SEEK_END);
file_size_in_byte=ftell(fp);
rewind(fp);
if (!code_table)
{
code_table = (UNICODE_GB*)utf8_malloc(file_size_in_byte);
code_table_size_in_item = file_size_in_byte / sizeof(UNICODE_GB);
APP_PRINT("\r\nopen file ok, size_in_byte=%d, size_in_item=%d", file_size_in_byte, code_table_size_in_item);
len = fread(code_table, sizeof(code_table[0]), code_table_size_in_item, fp);
if (len != code_table_size_in_item)
{
APP_PRINT("\r\nfile read error, len ret=%d", len);
ret = -3;
}
}
else
{
APP_PRINT("\r\ncode table handle is exists error");
ret = -2;
}
fclose(fp);
return ret;
}
//反初始化utf8转gb2312转换环境
//参数: 无
//返回值: 0- 成功, 其他值-失败
int Utf8ToGb2312_deinit(void)
{
utf8_free(code_table);
code_table = NULL;
code_table_size_in_item = 0;
return 0;
}
int _tmain(int argc, _TCHAR* argv[])
{
//char temp[100];
char utf8[100] = {0xe4, 0xbd, 0xa0};
char gb2312[100];
int ret;
Utf8ToGb2312_init();
//memset(temp, 0, sizeof(temp));
memset(gb2312, 0, sizeof(gb2312));
ret=Utf8ToGb2312(utf8, strlen(utf8), gb2312);
if (!ret)
{
printf("\r\nutf8 to gb2312 ok\r\n");
printf((char*)gb2312);
}
else
{
printf("\r\nutf8 to gb2312 fail\r\n");
}
Utf8ToGb2312_deinit();
getchar();
return 0;
}