字符转换函数
encoding.c
/************************************************************************
*
* File name: encoding.c
*
* Description: Convert character encoding between GB2312
* and Unicode(Utf-8).
*
* Author: JiangPeifu
* Creation date: 2009-02-20
*
************************************************************************/
#include <stdio.h>
#include "encoding.h"
#define ENCODING_DEBUG 1
#if ENCODING_DEBUG
#define debug printf
#else
#define debug
#endif //ENCODING_DEBUG
const unsigned char *GBCodeToUnicode(unsigned char *gbCode);
const unsigned char *UnicodeToGBCode(unsigned char *unicode);
static void UnicodeToUtf8(char* utf8, char *unicode);
static void Utf8ToUnicode(char* unicode, char *utf8);
void GB2312StrToUtf8(
char *utf8Str, /* Output Utf-8 chars */
char* gbStr, /* Input GB2312 chars */
int nBytes /* size of input GB2312 chars */
);
void Utf8StrToGB2312(
char *gbStr, /* Output GB2312 chars */
char* utf8Str, /* Input Utf-8 chars */
int nBytes /* Size of input GB2312 chars */
);
/************************************************************************
* Function: GBCodeToUnicode
* Convert one GB2312 character to one Unicode character
************************************************************************/
const unsigned char *GBCodeToUnicode(unsigned char *gbCode)
{
const unsigned char *mapped = 0;
unsigned int i = 0;
if ((*(gbCode + 1) >= 0xa1) && (*(gbCode + 1) <= 0xfe))
{
if ((*gbCode >= 0xa1) && (*gbCode <= 0xa9))
{
i = ((*gbCode - 0xa1) * 94 + (*(gbCode + 1) - 0xa1)) * 2;
mapped = &gb2uTable[i];
} /* end of if */
else
{
if ((*gbCode >= 0xb0) && (*gbCode <= 0xf7))
{
i = ((*gbCode - 0xb0 + 9) * 94 + (*(gbCode + 1) - 0xa1)) * 2;
mapped = &gb2uTable[i];
} /* end of if */
else
{
debug("ERROR: GB2312 convert to unicode!!!/n");
}
}
} /* end of if */
else
{
debug("ERROR: GB2312 convert to unicode!!!/n");
}
return mapped;
}
/************************************************************************
* Function: UnicodeToGBCode
* Convert single Unicode character to single GB2312 character
************************************************************************/
const unsigned char *UnicodeToGBCode(unsigned char *unicode)
{
unsigned int i;
i = ((*unicode << 8) + *(unicode + 1)) * 2;
return &u2gbTable[i];
}
/************************************************************************
* Function: UnicodeToUtf8
************************************************************************/
static void UnicodeToUtf8(char* utf8, char *unicode)
{
char *pchar = unicode;
if (unicode == 0)
{
debug("ERROR: Unicode convert to utf8, unicode=0/n");
return;
}
unsigned char Hchar = *pchar;
unsigned char Lchar = *(pchar + 1);
utf8[0] = (0xE0 | ((Hchar & 0xF0) >>4));
utf8[1] = (0x80 | ((Hchar & 0x0F) <<2)) + ((Lchar & 0xc0) >>6);
utf8[2] = (0x80 | (Lchar & 0x3F));
return ;
}
/************************************************************************
* Function: GB2312StrToUtf8
***********************************************************************/
void GB2312StrToUtf8(
char *utf8Str, /* Output Utf-8 chars */
char* gbStr, /* Input GB2312 chars */
int nBytes /* size of input GB2312 chars */
)
{
char buf[3];
int i = 0;
int j = 0;
while (i < nBytes)
{
if(*(gbStr + i) >= 0)
{
utf8Str[j++] = gbStr[i++];
debug("GB2312Str[%d]=%c/n", i-1, gbStr[i-1]);
debug(" utf8Str[%d]=%c/n", j-1, utf8Str[j-1]);
}
else
{
char *pbuffer;
pbuffer = (char *)GBCodeToUnicode(gbStr+i);
debug("unicode [0]=%x, [1]=%x/n", *pbuffer, *(pbuffer+1));
UnicodeToUtf8(buf, pbuffer);
utf8Str[j++] = buf[0];
debug(" utf8Str[%d]=%x/n", j-1, utf8Str[j-1]);
utf8Str[j++] = buf[1];
debug(" utf8Str[%d]=%x/n", j-1, utf8Str[j-1]);
utf8Str[j++] = buf[2];
debug(" utf8Str[%d]=%x/n", j-1, utf8Str[j-1]);
i +=2;
}
}
utf8Str[j] = '/0';
return;
}
/************************************************************************
* Function: Utf8ToUnicode
************************************************************************/
static void Utf8ToUnicode(char* unicode, char *utf8)
{
char *pchar = utf8;
int nBytes = 0;
if (0 == (*utf8 & 0x80))
{
/*
* single-byte char
*/
nBytes = 1;
unicode[0] = *utf8;
}
else
{
/*
* 3-byte char (chinese char)
*/
int i;
if ( (*utf8 & 0xf0) == 0xe0 )
{
nBytes = 3;
unicode[0] = ((utf8[0] & 0x0f) <<4) + ((utf8[1] & 0x3c) >>2);
unicode[1] = ((utf8[1] & 0x03) <<6) + (utf8[2] & 0x3f);
}
else
{
debug("ERROR: utf-8 to unicode, nBytes !=3/n");
nBytes = 0;
unicode[0] = '?';
return;
}
}
return;
}
/************************************************************************
* Function: GB2312StrToUtf8
***********************************************************************/
void Utf8StrToGB2312(
char *gbStr, /* Output GB2312 chars */
char* utf8Str, /* Input Utf-8 chars */
int nBytes /* Size of input GB2312 chars */
)
{
char buf[2];
int i = 0;
int j = 0;
while (i < nBytes)
{
if (0 == (*(utf8Str + i) & 0x80))
{
gbStr[j++] = utf8Str[i++];
debug(" utf8Str[%d]=%c/n", i-1, utf8Str[i-1]);
debug("GB2312Str[%d]=%c/n", j-1, gbStr[j-1]);
}
else
{
const unsigned char *pbuffer;
Utf8ToUnicode(buf, utf8Str + i);
debug(" utf8Str[%d]=%x/n" ,i, utf8Str[i]);
debug(" utf8Str[%d]=%x/n" ,i+1, utf8Str[i+1]);
debug(" utf8Str[%d]=%x/n" ,i+2, utf8Str[i+2]);
debug("unicode [0]=%x, [1]=%x/n", buf[0], buf[1]);
pbuffer = UnicodeToGBCode(buf);
gbStr[j++] = *pbuffer;
debug("GB2312[%d]=%x/n", j-1, gbStr[j-1]);
gbStr[j++] = *(pbuffer + 1);
debug("GB2312[%d]=%x/n", j-1, gbStr[j-1]);
i +=3;
}
}
gbStr[j] = 0;
return;
}
至此,一切 OK !!!