/*词法扫描器设计*/
/*2007-10-24 21:22调试通过*/
#include "iostream.h"
#include "stdlib.h"
#include "fstream.h"
#include "stdio.h"
#include "string.h"
/*the length of token if 9*/
#define Max_len_token 10
#define Max_len_symbol 3
#define keywordnumber 21
#define operatornumber 8
/*
以下为使用的语言子类
保留字
procedure ,var ,begin ,if ,then ,else ,end ,do ,while ,switch ,case ,for ,string ,false ,maxint ,true ,boolean ,char ,real ,integer ,new
运算符
+ - * / ( ) :=
对于其他暂时不做过多考虑
*/
typedef struct token_{
char str[Max_len_token];
struct token_ *next;
}mytoken;
typedef struct symbol_{
char str [Max_len_symbol];
struct symbol_ *next ;
}symbol;
struct input{//检验输入
char str [Max_len_token]; //存放字串数据的数组
int type; //数据类型
};
/*function*/
void read_file(char * ); /*从文件中读出数据*/
void init(void); /*init*/
int inputchar(char ); /*读入一个字符*/
int isoperator(char ); /*是否为 operator*/
void movestr (void ); /*将字符数组移动到链表中*/
void iskeyword(char * ); /*是否为 keyword*/
int exist(char *, mytoken * ); /*是否已经存在*/
void printall(void); /*全部打印到屏幕*/
/*list*/
//因为不区分大小写,设置两个数组,一个用于比较字符,另一个用于存储输入的字符
mytoken *keywordlist, *idlist ,*constantlist; /*存储字符*/
mytoken *keywordcmp , *idcmp , *constantcmp; /*存储比较字符*/
int operatorflag[operatornumber];
mytoken syskw[keywordnumber];
symbol sysop[operatornumber];
int delimiterflag ;
struct input *n_input =(struct input *)malloc (sizeof(struct input));
struct input *ori_input =(struct input *)malloc (sizeof(struct input));
char operatorbuf[4];
static int bufcount =0;
static int ncount=0;
static int rowcount =0;
static int flag =0;
void main(int argc,char *argv[])
{
//测试输入参数
/* int count=0;
while(count <argc)
{
cout<<argv[count];
count ++;
}*/
init();
if(argc >1 )
read_file(argv[1]);
else
read_file("test.txt");
printall();
};
void read_file(char * filename)
{
fstream file(filename ,ios::in||ios::nocreate );
if (! file)
{
cout<<"open txt document error!"<<endl;
return ;
}
cout<<"对下列文件进行分析:"<< filename<<endl;
char s[2];
s[1] = 0 ;
file.read (s,1);
int fileflag = 0;
while(! file.eof())
{
/*测试读出的数据*/
/* cout<<s;
file.read (s,1);*/
/*向表中添加数据*/
if(fileflag == 0){
fileflag = inputchar( s[0]);
if(fileflag ==2){
cout<<"第"<< rowcount <<"行出错"<<endl;
}
}
else if(fileflag == 1){
movestr();
fileflag = inputchar( s[0]);
}
else if(fileflag ==2){
if(isoperator (s[0]))
fileflag = 0;
else if ( (s[0] == 0x20) ||(s[0] ==0x9))
fileflag =0 ;
else if (s[0] == 0x0a || s[0] == 0x3b){
fileflag = inputchar (s[0]);
}
}
file.read( s , 1 );
}
movestr();
};
void init(void ){
n_input->type = 7;
delimiterflag = 0;
for (int i = 0;i < 10 ;i++){
n_input ->str [i] = 0 ;
ori_input ->str [i] = 0;
}
for( i = 0 ; i < 4 ; i++){
operatorbuf[i] = 0 ;
}
for ( i = 0 ;i < operatornumber ;i++){
operatorflag[i] = 0;
}
{/*set the pointer*/
constantlist =(mytoken *)malloc (sizeof (mytoken));
keywordcmp =(mytoken *)malloc (sizeof (mytoken));
keywordlist =(mytoken *)malloc (sizeof (mytoken));
idlist =(mytoken *)malloc (sizeof (mytoken));
idcmp =(mytoken *)malloc (sizeof (mytoken));
constantcmp =(mytoken *)malloc (sizeof (mytoken));
constantlist ->next =NULL;
constantcmp ->next =NULL;
keywordlist ->next =NULL;
keywordcmp ->next =NULL;
idlist ->next =NULL;
idcmp ->next =NULL;
}
{/*set the keyword*/
strcpy(syskw[0].str,"string");
strcpy(syskw[1].str,"procedure");
strcpy(syskw[2].str,"var");
strcpy(syskw[3].str,"begin");
strcpy(syskw[4].str,"if");
strcpy(syskw[5].str,"then");
strcpy(syskw[6].str,"else");
strcpy(syskw[7].str,"end");
strcpy(syskw[8].str,"do");
strcpy(syskw[9].str,"while");
strcpy(syskw[10].str,"switch");
strcpy(syskw[11].str,"case");
strcpy(syskw[12].str,"for");
strcpy(syskw[13].str,"new");
strcpy(syskw[14].str,"false");
strcpy(syskw[15].str,"maxint");
strcpy(syskw[16].str,"true");
strcpy(syskw[17].str,"boolean");
strcpy(syskw[18].str,"char");
strcpy(syskw[19].str,"real");
strcpy(syskw[20].str,"integer");
}
{/*set the operator*/
strcpy(sysop[0].str,":=");
strcpy(sysop[1].str,"+");
strcpy(sysop[2].str,"-");
strcpy(sysop[3].str,"*");
strcpy(sysop[4].str,"/");
strcpy(sysop[5].str,"(");
strcpy(sysop[6].str,")");
strcpy(sysop[7].str,"=");
}
};
/**************************
0 is continue
1 is ok
2 is error
***************************/
int inputchar (char c){
/*one is origin,the other is compare*/
char corigin ,ccompare;
if( (c == 0x20) || (c == 0x9)){
return 1;
}
if(c ==0x0a){//在这里不能设置ncount = 0
rowcount++;
return 1;
}
if (isoperator (c)){
if(bufcount >1)
return 2;
if( (bufcount == 1)&& (c != '='))
return 2;
if( (bufcount == 0 )&& (c == ':'))
{
operatorbuf[bufcount ++] = c;
return 0;
}
operatorbuf[bufcount ++] = c;
return 1;
}
bufcount =0;
if( (c > 0x40 )&& (c < 0x5b ))
ccompare = c + 0x20;
else
ccompare = c;
corigin = c;
if ((ccompare >0x60) && (ccompare < 0x7b)) {
if( n_input-> type == 4)
{
n_input ->type = 0;
return 2 ;
}
if( n_input-> type & 4 != 0 )
n_input-> type ^= 4;
n_input-> str[ncount] =ccompare ;
ori_input-> str[ncount ++] =corigin ;
return 0;
}
else if((ccompare > 0x2f) && (ccompare < 0x3a)){
if( n_input ->type & 2 != 0)
n_input-> type ^= 2;
if( n_input ->type & 1 != 0)
n_input-> type ^= 1;
n_input-> str[ncount] =ccompare ;
ori_input-> str[ncount ++] =corigin ;
return 0;
}
else if(ccompare == '_'){
if( n_input ->type & 4 != 0)
n_input-> type ^= 4;
if( n_input ->type & 2 != 0)
n_input-> type ^= 2;
n_input-> str[ncount] =ccompare ;
ori_input-> str[ncount ++] =corigin ;
return 0;
}
return 2;
};
int isoperator (char c){
if(c =='*')
return 1;
if(c =='-')
return 1;
if(c =='+')
return 1;
if(c =='/')
return 1;
if(c ==':')
return 1;
if(c ==';')
return 1;
if(c =='=')
return 1;
return 0;
};
void movestr (void ){
if(ncount != 0){
ncount = 0 ;
if(n_input ->type ==0)
return ;
if(n_input->type <4) //后加的
iskeyword(n_input->str );
static mytoken *p = NULL ,* q =NULL;
if(n_input-> type == 1){
p = idlist;
q = idcmp;
}
else if(n_input ->type == 2){
p = keywordlist;
q = keywordcmp;
}
else if(n_input ->type == 4){
p = constantlist;
q = constantcmp;
}
if(exist (n_input ->str ,q))
return ;
//添加数据到list中
while (p->next != NULL)
p = p->next ;
p->next = (mytoken *)malloc (sizeof (mytoken));
p = p->next ;
p->next =NULL;
strcpy(p->str ,ori_input ->str );
while(q->next != NULL)
q = q->next ;
q->next = (mytoken *)malloc (sizeof (mytoken));
q = q->next ;
q->next =NULL;
strcpy(q->str ,n_input ->str );
//重置
for (int i = 0;i < 10 ;i++){
n_input-> str [i] = 0 ;
ori_input->str [i] = 0 ;
}
n_input-> type = 7 ;
ncount = 0;
}
if (bufcount != 0){
if ( operatorbuf[0] == 0x3b)//此为 界限符 ;
delimiterflag ++;
else if (operatorbuf[1] == 0x3b)
delimiterflag ++;
else if(bufcount == 1){//此为单个运算符
for( int i = 1; i < operatornumber ;i++){
if(operatorbuf[0] == sysop[i].str [0]){
operatorflag [i]++;
break ;
}
}
}
else if (bufcount == 2){//此为 :=
if( (operatorbuf[0] == ':' ) && (operatorbuf [1] == '=')){
operatorflag[0]++;
}
}
bufcount = 0;
operatorbuf[0] = 0 ;
operatorbuf[1] = 0 ;
}
};
void iskeyword(char *key)
{
for (int i = 0; i < keywordnumber; i++)
{
if(strcmp(syskw[i].str ,key) == 0)
{
n_input->type =2;
return ;
}
}
n_input->type = 1;
};
int exist(char *str, mytoken *list)
{
mytoken *p;
p=list->next ;
while(p !=NULL)
{
if(strcmp (p->str ,str) == 0)
{
return 1;
}
p = p->next ;
}
return 0;
};
void printall(void ){
mytoken *p;
cout<<"*******************"<<endl;
cout<<"key words are:"<<endl;
p = keywordlist->next ;
while(p !=NULL){
cout<<p ->str<<endl;
p = p->next;
}
cout<<"*******************"<<endl;
cout<<"ids are:"<<endl;
p = idlist->next ;
while(p !=NULL){
cout<<p ->str<<endl;
p = p->next;
}
cout<<"*******************"<<endl;
cout<<"operators are:"<<endl;
for (int i = 0; i < operatornumber ; i++){
if(operatorflag[i] != 0)
cout<<sysop[i] .str <<"/t出现了/t"<<operatorflag [i]<<"次"<<endl;
}
cout<<"*******************"<<endl;
cout<<"constants are:"<<endl;
p = constantlist ->next ;
while(p !=NULL){
cout<<p ->str<<endl;
p = p->next;
}
cout<<"*******************"<<endl;
cout<<"界限符 ; 出现了"<< delimiterflag<<"次"<<endl;
cout<<"*******************"<<endl;
};