utabgen.c 是 在实际项目中由于项目需要,为对照码表方便高效, 本人编写的 一个提取 unicode 网站上的 unicode码 和本国语言扩展ascll码 对照表 txt文件 ,同时生成 固定格式 的c 文件 数组。
使用:
utabgen big5.txt big5.c
其中 big5.txt 为unicode 网站的txt对应文件
big5.c 为输出文件, 与fs/nls 目录下的文件格式相应。
具体 算法安排 :
1 读取 txt文件,取出每一行有效 token ,忽略# 注释后字符串。其中txt 文件内每一行第一列为 本国语言扩展ascll码, 中间为 空格间隔, 第二列为对应 unicode码,其后为空格 或 # 注释
2 每一行 内的码 取入 unicode==ascll 对应 结构体数组内。
3 判断 是1字节还是2字节 的 ascll码
4 unicode码 从低到高 排序 ,依次写入 U2C_LTAB,U2C_HTAB文件,其中 U2C_LTAB为低字节的数组,U2C_HTAB为相应得高字节 的索引文件
4 ascll码 从低到高 排序 ,依次写入 C2U _LTAB,C2U _HTAB文件,其中 C2U _LTAB为低字节的数组,C2U _HTAB为相应得高字节 的索引文件
5 C2U _LTAB,C2U _HTAB ,U2C_LTAB,U2C_HTAB一次写入 最终的输出 c文件 ,达到 linux 的fs/nls 目录下的对照格式。
以下为程序源码:
/* main.c */
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <linux/unistd.h>
#include <linux/fcntl.h>
#include<string.h>
config//
#define LINEBUF_SIZE 256
#define C2U_HTAB "c2u_high.c"
#define C2U_LTAB "c2u_low.c"
#define U2C_HTAB "u2c_high.c"
#define U2C_LTAB "u2c_low.c"
#define islineseperator(c) ((c==0xd)||(c==0xa))
#define iscomment(c) ((c)=='#')
#define TOKENSEP " "
static//
struct map
{
unsigned int wchar; //type =0
unsigned int uchar;
};
static char buffer[LINEBUF_SIZE] ; // a line buffer
static char buffer2[LINEBUF_SIZE] ; //for writing high index
static struct map *tab;
static struct map lowtab[256];
static int readline(int fd, char * buffer,int size)
{
unsigned char c,c2;
int len=0; //valid token size
int readlen;
int isvalidline=1;
while( len < size )
{
readlen=read(fd,&c,1);
if(readlen<=0 )
return -1;
if(c ==0x0d ||c =='/n' )
{
isvalidline = 1;
readlen=read(fd,&c2,1);
if( c2 ==0x0a)
{
if(len >0)
break;
continue;
}
if(len >0)
{
lseek(fd,-1,SEEK_CUR);
break;
}
c =c2;
}
if(isvalidline)
{
if(!iscomment(c))
{
if(c==' '&&len==0 ) //skip the line first white space
continue;
*buffer ++= c;
len++;
}
else
{
isvalidline =0;
}
}
}
*buffer ='/0';
return len;
}
static int orderbytype(int type,struct map *tab, int size)
{
struct map tmp;
int i,j;
if(type==0)
{
for(i=0;i<size ;i++)
{
for(j=i+1;j<size ;j++)
{
if(tab[i].wchar >tab[j].wchar)
{
tmp = tab[i];
tab[i] = tab[j];
tab[j] = tmp;
}
}
}
}
else
{
for(i=0;i<size ;i++)
{
for(j=i+1;j<size ;j++)
{
if(tab[i].uchar >tab[j].uchar)
{
tmp = tab[i];
tab[i] = tab[j];
tab[j] = tmp;
}
}
}
}
}
static int writelowtab(int fd ,int type,struct map *tab,int high )
{
int i,j;
int len;
if(type==0)
{
len =sprintf(buffer,"/n/nstatic u_int16_t c2u_%02X [256] = {",high);
for(i=0;i<256 ;i++)
{
if((i%8)==0)
{
if(i>0)
len+=sprintf(buffer+len,"/* 0x%02X-0x%02X */",i-8,i-1);
write(fd, buffer,len);
if(i==128)
len=sprintf(buffer,"/n/n/t");
else
len=sprintf(buffer,"/n/t");
}
len += sprintf(buffer+len, "0x%04X,",tab[i].uchar);
}
len+=sprintf(buffer+len,"/* 0x%X-0x%X */",i-8,i-1);
write(fd, buffer,len);
}
else
{
len =sprintf(buffer,"/n/nstatic unsigned char u2c_%02X[512] = {",high);
for(i=0;i<256 ;i++)
{
if((i%4)==0)
{
if(i>0)
len+=sprintf(buffer+len,"/* 0x%02X-0x%02X */",i-4,i-1);
write(fd, buffer,len);
if(i==128)
len=sprintf(buffer,"/n/n/t");
else
len=sprintf(buffer,"/n/t");
}
len += sprintf(buffer+len, "0x%02X, ",tab[i].wchar>>8);
len += sprintf(buffer+len, "0x%02X, ",tab[i].wchar&0xff);
}
len+=sprintf(buffer+len,"/* 0x%02X-0x%02X */",i-4,i-1);
write(fd, buffer,len);
}
len = sprintf(buffer,"/n};");
write(fd, buffer,len);
}
static int writelowtab2(int fd ,int type,struct map *tab,int high )
{
int i,j;
int len;
if(type==0)
{
len =sprintf(buffer,"static u_int16_t *page_uni2charset[256] ={/n",high);
for(i=0;i<256 ;i++)
{
if((i%8)==0)
{
if(i>0)
len+=sprintf(buffer+len,"/* 0x%02X-0x%02X */",i-8,i-1);
write(fd, buffer,len);
if(i==128)
len=sprintf(buffer,"/n/n/t");
else
len=sprintf(buffer,"/n/t");
}
len += sprintf(buffer+len, "0x%04X,",tab[i].uchar);
}
len+=sprintf(buffer+len,"/* 0x%X-0x%X */",i-8,i-1);
write(fd, buffer,len);
}
else
{
len =sprintf(buffer,"static unsigned char *page_charset2uni[256] = {/n",high);
for(i=0;i<256 ;i++)
{
if((i%4)==0)
{
if(i>0)
len+=sprintf(buffer+len,"/* 0x%02X-0x%02X */",i-4,i-1);
write(fd, buffer,len);
if(i==128)
len=sprintf(buffer,"/n/n/t");
else
len=sprintf(buffer,"/n/t");
}
len += sprintf(buffer+len, "0x%02X, ",tab[i].wchar>>8);
len += sprintf(buffer+len, "0x%02X, ",tab[i].wchar&0xff);
}
len+=sprintf(buffer+len,"/* 0x%02X-0x%02X */",i-4,i-1);
write(fd, buffer,len);
}
len = sprintf(buffer,"/n};");
write(fd, buffer,len);
}
static int headerprint=0;
static int savehigh=0;
static int len=0;
static int writehightab(int fd ,int type,int high)
{
if(!headerprint)
{
memset(buffer2,0,LINEBUF_SIZE);
if(type==0)
{
len =sprintf(buffer2,"static u_int16_t *page_charset2uni[256] = {/n/t");
}
else
{
len =sprintf(buffer2,"static unsigned char *page_uni2charset[256] = {/n/t");
}
headerprint =1;
savehigh =-1;
}
if(high > (savehigh+1) )
{
while(high> (savehigh+1))
{
savehigh++;
if((savehigh%8)==0)
{
len+=sprintf(buffer2+len,"/n/t");
write(fd, buffer2,len);
memset(buffer2,0,LINEBUF_SIZE);
len =0;
}
len += sprintf(buffer2+len, "NULL, ");
}
}
// else
{
if((high%8)==0)
{
len+=sprintf(buffer2+len,"/n/t");
write(fd, buffer2,len);
memset(buffer2,0,LINEBUF_SIZE);
len =0;
}
}
if(type==0)
{
len += sprintf(buffer2+len, "c2u_%02X, ",high); //c2u_A1
}
else
{
len += sprintf(buffer2+len, "u2c_%02X, ",high); //c2u_A1
}
savehigh = high;
}
static int writehighend(int fd)
{
if(savehigh%8)
{
write(fd, buffer2,len);
len =0;
}
len+= sprintf(buffer2+len,"};/n");
write(fd, buffer2,len);
headerprint=0;
savehigh=0;
}
static int copytodstfile (int fd,char* src)
{
int fd2= open(src, O_RDWR);
int readlen;
if(fd2<0)
{
printf("open:%s fail!/n",src);
return -1;
}
while(readlen=read(fd2,buffer,LINEBUF_SIZE),readlen>0)
{
write(fd,buffer,readlen) ;
}
readlen =sprintf(buffer,"/n/t");
write(fd,buffer,readlen) ;
close(fd2);
return 0;
}
int main(int argc,char* argv[])
{
int fd,fd2;
char *srcfile = argv[1];
int i,wcharsize = 1; //atoi(argv[2]);
int tabsize= (wcharsize ==2?65536:256);
char *dstfile = argv[2];
int high,savehigh;
int low;
if(argc<3)
{
printf("two few args/n");
printf("utabgen srcfilepath dstfilepath/n");
return -1;
}
chdir(".");
read to ram//
printf("read file:%s to ram now/n",srcfile);
fd= open(srcfile, O_RDWR);
if(fd<0)
{
printf("open:%s fail in line %d!/n",srcfile,__LINE__);
exit(1);
}
tab = (struct map *)calloc(tabsize*sizeof(struct map),1);
if(!tab)
{
printf("malloc table fail in line %d!/n",__LINE__);
close(fd);
exit(1);
}
i=0;
while(readline(fd,buffer,LINEBUF_SIZE)>=0)
{
sscanf(buffer,"%x %x",&tab[i].wchar,&tab[i].uchar);
i++;
}
close(fd);
tabsize = i;
if(i>0x100)
{
wcharsize=2;
}
else
{
for(i=0;i<tabsize;i++)
{
if(tab[i].wchar>=0x100)
{
wcharsize=2;
break;
}
}
}
printf("ansichar is %d bytes!/n",wcharsize);
//unicode char //
printf("gen unicode tab now/n");
orderbytype (1,tab,tabsize);
fd= open(U2C_LTAB, O_RDWR|O_CREAT|O_TRUNC);
if(fd<0)
{
printf("open:%s fail in line %d!/n",U2C_LTAB,__LINE__);
free(tab) ;
exit(1);
}
fd2= open(U2C_HTAB, O_RDWR|O_CREAT|O_TRUNC);
if(fd2<0)
{
printf("open:%s fail in line %d!/n",U2C_HTAB,__LINE__);
close(fd);
free(tab) ;
exit(1);
}
savehigh =(tab[0].uchar>>8)&0xff;
memset(&lowtab,0,256*sizeof(struct map));
for(i=0;i<tabsize;i++)
{
high = (tab[i].uchar>>8)&0xff;
low =tab[i].uchar &0xff;
if(high != savehigh)
{
writelowtab(fd ,1, lowtab,savehigh);
writehightab(fd2 ,1,savehigh );
savehigh = high; //new high index
memset(&lowtab,0,256*sizeof(struct map));
}
lowtab[low ] =tab[ i];
}
writehighend(fd2);
close(fd);
close(fd2);
///ansi char //
printf("gen ascll tab now/n");
orderbytype (0,tab,tabsize);
if(wcharsize==2)
{
fd= open(C2U_LTAB, O_RDWR|O_CREAT|O_TRUNC);
if(fd<0)
{
printf("open:%s fail!/n",C2U_LTAB);
free(tab) ;
exit(1);
}
fd2= open(C2U_HTAB, O_RDWR|O_CREAT|O_TRUNC);
if(fd2<0)
{
printf("open:%s fail!/n",C2U_HTAB);
close(fd);
free(tab) ;
exit(1);
}
savehigh =(tab[0].wchar>>8)&0xff;
memset(&lowtab,0,256*sizeof(struct map));
for(i=0;i<tabsize;i++)
{
high = (tab[i].wchar>>8)&0xff;
low = tab[i].wchar &0xff;
if(high != savehigh)
{
writelowtab(fd ,0, lowtab,savehigh);
writehightab(fd2 ,0,savehigh );
savehigh = high; //new high index
memset(&lowtab,0,256*sizeof(struct map));
}
lowtab[low ] =tab[ i];
}
writehighend(fd2);
close(fd);
close(fd2);
}
else
{
fd= open(C2U_LTAB, O_RDWR|O_CREAT|O_TRUNC);
if(fd<0)
{
printf("open:%s fail!/n",C2U_LTAB);
free(tab) ;
exit(1);
}
memset(&lowtab,0,256*sizeof(struct map));
for(i=0;i<tabsize;i++)
{
low = tab[i].wchar &0xff;
lowtab[low ] =tab[ i];
}
writelowtab2(fd ,0, lowtab,savehigh);
close(fd);
}
///copy //
printf("copy all tab to dst file: %s now/n",dstfile);
fd= open(dstfile, O_RDWR|O_CREAT|O_TRUNC);
if(fd<0)
{
printf("open:%s fail!/n",dstfile);
free(tab) ;
exit(1);
}
struct tm *local;
time_t t;
t=time(NULL);
local=localtime(&t);
len=sprintf(buffer,"// generate in %d/%d/%d/ %d:%d:%d/n",local->tm_year+1900,local->tm_mon,local->tm_mday,local->tm_hour,local->tm_min,local->tm_sec);
write(fd,buffer,len);
if(copytodstfile(fd,C2U_LTAB)<0)
{
close(fd);
free(tab) ;
exit(1);
}
if(wcharsize==2&& copytodstfile(fd,C2U_HTAB)<0)
{
close(fd);
free(tab) ;
exit(1);
}
if(copytodstfile(fd,U2C_LTAB)<0)
{
close(fd);
free(tab) ;
exit(1);
}
if(copytodstfile(fd,U2C_HTAB)<0)
{
close(fd);
free(tab) ;
exit(1);
}
free(tab);
close(fd);
chmod(dstfile,0777);
printf("all is OK!!!/n");
return 0;
}