PL0词法分析器(改进)
编译原理第一次实验-PL0词法分析器
一开始觉得毫无头绪,后来看到学长的博客就慢慢顿(学)悟(会)了,ps:ddl真的是第一生产力,不废话了 上代码,先要放上参考的博客,也是我的本校学长的博客https://blog.csdn.net/chen_dsir/article/details/70234123
我的改进代码如下
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<iostream>
#define MAX_SIZE 5000
using namespace std;
/* cout<<"单词识别结果:"<<endl;
cout<<"1-表示基本字"<<endl;
cout<<"2-表示标识符"<<endl;
cout<<"3-表示数字"<<endl;
cout<<"4-表示运算符"<<endl;
cout<<"5-表示界符"<<endl;
cout<<"6-表示非法"<<endl;
*/
typedef struct Word
{
int kind; //种类标记
char value[20]; //值
} Word;
int main()
{
char ch,word_t[20]=""; //ch 存放暂时输入的字符 word_t存放字符串
int i=0,j=0,k=0,key=0,choice;
char keyword[45][20]= {"studentid", //0
"wanhaibo", //1
"name", //2
"begin", //3
"call", //4
"const", //5
"do", //6
"end", //7
"if", //8
"odd", //9
"procedure",//10
"read", //11
"then", //12
"var", //13
"while", //14
"write", //15
/*以上为基本字*/
",", //16
";", //17
".", //18
"(", //19
")", //20
"[", //21
"]", //22
"{", //23
"}", //24
/*以上为界符*/
"+", //25
"-", //26
"*", //27
"/", //28
"=", //29
"<", //30
">", //31
"<=", //32
">=", //33
":=", //34
/*以上为运算符*/
"0", //35
"1", //36
"2", //37
"3", //38
"4", //39
"5", //40
"6", //41
"7", //42
"8", //43
"9"}; //44
FILE *fp;
Word word[MAX_SIZE];
cout<<"请输入选择"<<endl;
cout<<"1.手动输入程序保存"<<endl;
cout<<"2.打开保存好的文件进行词法分析"<<endl;
cin>>choice;
switch(choice)
{
case 1:
fp=fopen("txt01.txt","wt"); //写,如果没有就创建
if(!fp)
{
cout<<"打开文件失败"<<endl;
exit(1);
}
cout<<"请输入程序,以#标志程序结束"<<endl;
ch=getchar();
while(ch!='#')
{
fputc(ch,fp);
ch=getchar();
} //将输入的信息保存到txt01.txt中
fclose(fp);
break;
case 2:
fp=fopen("txt01.txt","r"); //读
if(!fp)
{
cout<<"打开文件失败"<<endl;
exit(1);
}
cout<<"程序源码:"<<endl;
while((ch=fgetc(fp))!=EOF)
{
putchar(ch);
} //显示源文件内容
fclose(fp);
cout<<"词法分析结果:"<<endl;
fp=fopen("txt01.txt","r");
if(!fp)
{
cout<<"打开文件失败"<<endl;
exit(1);
}
while((ch=fgetc(fp))!=EOF)
{
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||(ch>='0'&&ch<='9'))
{
word_t[key++]=ch;
word_t[key]='\0';
continue;
} //进入条件: ch为字母||数字
else if(ch=='<'||ch=='>')
{
strcpy(word[i].value,word_t);
strcpy(word_t,""); //初始化
key=0; //初始化
i++; //下标++//abc>= 这样,要先把前边的东西保存进wrod
char t=ch;
ch=fgetc(fp);
if(ch=='=')
{
word_t[0]=t;
word_t[1]=ch;
word_t[2]='\0';
strcpy(word[i].value,word_t);
strcpy(word_t,""); //初始化
key=0; //初始化
i++; //下标++
} //将>= <=连成一个单词
else
fseek(fp,-1,SEEK_CUR); //偏移量为-1 (指针往前移)
}
else if(ch==':')
{
strcpy(word[i].value,word_t);
strcpy(word_t,""); //初始化
key=0; //初始化
i++; //下标++//abc>= 这样,要先把前边的东西保存进wrod
char t=ch;
ch=fgetc(fp);
if(ch=='=')
{
word_t[0]=t;
word_t[1]=ch;
word_t[2]='\0';
strcpy(word[i].value,word_t);
strcpy(word_t,""); //初始化
key=0; //初始化
i++; //下标++
} //将:=连成一个单词
else
{
word_t[0]=t;
word_t[1]='\0';
strcpy(word[i].value,word_t);
strcpy(word_t,""); //初始化
key=0; //初始化
i++; //下标++
}
// fseek(fp,-1,SEEK_CUR); //偏移量为-1 (指针往前移)
}
else
{
if(strcmp(word_t,"")!=0)
{
strcpy(word[i].value,word_t); //将单词复制到数组中
strcpy(word_t,""); //初始化
key=0; //初始化
i++;
} //缓冲区不空且ch不为字母或者数字
if(ch==10||ch==13||ch==32||ch==9||ch==11) //去空格回车跟制表符
{
continue;
} //ch不为字母或数字且去掉空的字符
else
{
word_t[0]=ch;
word_t[1]='\0';
strcpy(word[i].value,word_t);//将非字母数字符号拷贝到结构数组中
strcpy(word_t,"");
key=0;//回到临时数组的开始位置
i++;
}
}
}
break;
default:
cout<<"输入错误"<<endl;
}
for(j=0; j<i; j++)
for(k=0; k<=44; k++)
{
if(strcmp(word[j].value,keyword[k])==0)
{
if(k>=0&&k<=15)
word[j].kind=1; //基本字
else if(k>=16&&k<=24)
word[j].kind=5; //界符
else if(k>=25&&k<=34)
word[j].kind=4; //运算符
break;
}
else if(word[j].value[0]>='0'&&word[j].value[0]<='9')
word[j].kind=3; //数字
else if((word[j].value[0]>='a'&&word[j].value[0]<='z')||(word[j].value[0]>='A'&&word[j].value[0]<='Z'))
word[j].kind=2; //标识符
else
word[j].kind=6; //非法
}
cout<<"单词识别结果:"<<endl;
cout<<"1-表示基本字"<<endl;
cout<<"2-表示标识符"<<endl;
cout<<"3-表示数字"<<endl;
cout<<"4-表示运算符"<<endl;
cout<<"5-表示界符"<<endl;
cout<<endl;
for(j=0; j<i; j++)
cout<<word[j].kind<<" "<<word[j].value<<endl;
cout<<endl<<endl;
cout<<"基本字:"<<endl;
for(j=0; j<i; j++)
if(word[j].kind==1)
cout<<word[j].value<<" ";
cout<<endl<<endl;
cout<<"标识符:"<<endl;
for(j=0; j<i; j++)
if(word[j].kind==2)
cout<<word[j].value<<" ";
cout<<endl<<endl;
cout<<"数字:"<<endl;
for(j=0; j<i; j++)
if(word[j].kind==3)
cout<<word[j].value<<" ";
cout<<endl<<endl;
cout<<"运算符:"<<endl;
for(j=0; j<i; j++)
if(word[j].kind==4)
cout<<word[j].value<<" ";
cout<<endl<<endl;
cout<<"界符:"<<endl;
for(j=0; j<i; j++)
if(word[j].kind==5)
cout<<word[j].value<<" ";
cout<<endl<<endl;
cout<<"非法:"<<endl;
for(j=0; j<i; j++)
if(word[j].kind==6)
cout<<word[j].value<<" ";
cout<<endl<<endl;
return 0;
}
讲解
(在字典里加的studentid、name是老师额外规定的基本字考核用的
在参考博客中,作者对于PL0中数字的识别是直接和字典中的0-9进行strcmp,这样会把123这样的字符识别成不是数字是标识符(123不和0-9中的任何一个字符strcmp==0),另外对于":=" 、">="、"<="的连接成一个单词放进word[i].value中的过程好像是有bug的,直接将学长的代码拿来运行一段PL0能看到明显的错误,我增加了if的判断规则,用到的新技术就是fseek(FILE,long,int);函数进行文件指针前移
同时在用if的时候一定要逻辑清晰,要知道自己这个else代表了什么,比如判断一个var:abc,这里的var都可以进入我的程序中的if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||(ch>='0'&&ch<='9'))
,但是:会进入else if(ch==':')
,这里一定要把之前存在word_t缓冲数组中的数据保存进word,另开一个新的缓冲,否则之前的数据就会丢失