源程序:
#include <stdio.h>
#include <windows.h>
//
// Unicode 转换成UTF-8
//
void UnicodeToUTF_8(char *pOut, const WCHAR wch)
{
// 注意 WCHAR高低字的顺序,低字节在前,高字节在后
char *pchar = (char *)&wch;
pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
pOut[2] = (0x80 | (pchar[0] & 0x3F));
return;
}
//
// GB2312 转换成 Unicode
//
inline WCHAR Gb2312ToUnicode(const char *gbBuffer)
{
WCHAR wch;
::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, &wch, 1);
return wch;
}
//
// GB2312 转为 UTF-8
//
int GB2312ToUTF_8(char *strOut, const char *pText, int nLen = -1)
{
char buf[4];
if (nLen == -1)
nLen = strlen(pText);
memset(buf,0,4);
int i = 0;
int k = 0;
while (i < nLen)
{
//如果是英文直接复制就可以
if( *(pText + i) >= 0)
{
//strOut += pText[i];
strOut[k++] = pText[i];
i++;
}
else
{
WCHAR wch;
wch = Gb2312ToUnicode(pText + i);
UnicodeToUTF_8(buf, wch);
// strOut += buf[0];
// strOut += buf[1];
// strOut += buf[2];
strOut[k++] = buf[0];
strOut[k++] = buf[1];
strOut[k++] = buf[2];
i += 2;
}
}
return k;
}
//
// GB2312编码类型的文件可以通过EditPlus工具创建
// UTF8编码类型的文件可以通过EditPlus工具验证查看
//
void main()
{
//
// 从一个GB2312的文件中读出来,写入到UTF8中去!
//
FILE *fp = fopen("gb2312.txt", "rb");
FILE *fp2 = fopen("utf8.txt", "wb");
char bufIn[1024] = {0};
char bufOut[1024] = {0};
int nFileLen = 0;
if(fp)
{
//获得GB2312的文件的长度
fseek(fp, 0L, SEEK_END);
nFileLen = ftell(fp);
fseek(fp, 0L, SEEK_SET);
fread(bufIn, 1, nFileLen, fp);
//
// 转换
//
int k = GB2312ToUTF_8(bufOut, bufIn, nFileLen);
//
// 存成一个UTF8文件
//
if (fp2)
{
fwrite(bufOut, 1, k, fp2);
fclose(fp2);
}
}
fclose(fp);
}