/**********************
转换0x4e00~0x9fa5
**********************/
#include<stdio.h>
int main()
{
long i =0;
unsigned char tmp[4] = {0};
FILE *fp = NULL;
fp = fopen("utf8.txt","w");
if (fp == NULL)
{
printf("open file err\n");
return (-1);
}
int count = 0;
for (i=0x4e00;i<=0x9fa5;i++)
{
enc_unicode_to_utf8(i,tmp);
//printf("%s\n",tmp);
fwrite(tmp,1,3,fp);
count++;
if (count%36 == 0)
{
fwrite("\n",1,1,fp);
}
}
fclose(fp);
return ( 0 );
}
int enc_unicode_to_utf8(unsigned long unic, unsigned char *pOutput)
{
if ( unic <= 0x0000007F )
{
// * U-00000000 - U-0000007F: 0xxxxxxx
*pOutput = (unic & 0x7F);
return 1;
}
else if ( unic >= 0x00000080 && unic <= 0x000007FF )
{
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
*(pOutput+1) = (unic & 0x3F) | 0x80;
*pOutput = ((unic >> 6) & 0x1F) | 0xC0;
return 2;
}
else if ( unic >= 0x00000800 && unic <= 0x0000FFFF )
{
// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
*(pOutput+2) = (unic & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 6) & 0x3F) | 0x80;
*pOutput = ((unic >> 12) & 0x0F) | 0xE0;
return 3;
}
else if ( unic >= 0x00010000 && unic <= 0x001FFFFF )
{
// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+3) = (unic & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80;
*pOutput = ((unic >> 18) & 0x07) | 0xF0;
return 4;
}
else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF )
{
// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+4) = (unic & 0x3F) | 0x80;
*(pOutput+3) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80;
*pOutput = ((unic >> 24) & 0x03) | 0xF8;
return 5;
}
else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF )
{
// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pOutput+5) = (unic & 0x3F) | 0x80;
*(pOutput+4) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80;
*(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80;
*pOutput = ((unic >> 30) & 0x01) | 0xFC;
return 6;
}
return 0;
}