编译原理：实验一练习

最新推荐文章于 2023-05-12 17:58:38 发布

XRT_knives

最新推荐文章于 2023-05-12 17:58:38 发布

阅读量259

点赞数 3

分类专栏：编译原理文章标签：编译器

本文链接：https://blog.csdn.net/XRT_knives/article/details/120788700

版权

编译原理专栏收录该内容

6 篇文章 0 订阅

订阅专栏

编译原理：实验一练习

词法分析器

编译原理实验报告

上面的第一个链接中的词法分析器有一点问题，修改后的代码如下：

 #include "string.h"
 #include "math.h"
 #include "stdio.h"

 char keywords[28][12]={
     "program","begin","end","var","while","do","repeat",
     "until","for","to","if","then","else",";", ":", "(", ")", ",",
     ":=", "+", "-", "*", "/", ">", ">=", "==", "<", "<="};
 int num_key=28;

 int aut[11][8]={
     {0, 0, 0, 0, 0, 0, 0, 0},
     {0, 2, 0, 0, 0, 8, 9,15},
     {0, 2, 3, 5,11, 0, 0,11},
     {0, 4, 0, 0, 0, 0, 0, 0},
     {0, 4, 0, 5,11, 0, 0,11},
     {0, 7, 0, 0, 6, 0, 0, 0},
     {0, 7, 0, 0, 0, 0, 0, 0},
     {0, 7, 0, 0,11, 0, 0,11},
     {0, 8, 0, 0, 0, 8, 0,12},
     {0, 0, 0, 0, 0, 0,10,14},
     {0, 0, 0, 0, 0, 0, 0,13}
 };
 char ID[50][12];
 float C[20];
 int num_ID=0,num_C=0;

 struct token{
     int code;
     int value;
 };                                    //Token结构

 struct token tok[100];                //Token数组
 int i_token=0,num_token=0;            //Token计数器和Token个数

 char strTOKEN[15];                    //当前单词
 int i_str;                            //当前单词指针

 int n,p,m,e,t;                        //尾数值，指数值，小数位数，指数符号，类型
 double num;//常数值
 char w[50];                           //源程序缓冲区
 int i;                                //源程序缓冲区指针,当前字符为w[i]

 struct map{                            //当前字符到状态转换矩阵列标记的映射
     char str[50];
     int col;
 };
 struct map col1[4]={{"0123456789",1},{".",2},{"Ee",3},{"+-",4}};        //数字
 struct map col2[2]={{"abcdefghijklmnopqrstuvwxyz",5},{"0123456789",1}}; //关键字或标识符
 struct map col3[1]={{";:(),+-*/=><",6}};                                //界符
 struct map *ptr;
 int num_map;

 void act(int s);
 int find(int s,char ch);
 int InsertConst(double num);
 int Reserve(char *str);
 int InsertID(char *str);

 int main(int argc, char* argv[]){
     FILE *fp;
     int s;//当前状态
     fp=fopen("testcase.txt","r");
     while (fgets(w,50,fp)!=NULL){
         //读取文件中的50个字符

         i=0;
         do{
             //w[50] = begin /n end
             while (w[i]==' '){//滤空格
                 i++;
             }if (w[i]>='a' && w[i]<='z'){//读取的首个字符是字母
                 ptr=col2;
                 num_map=2;
             }else if (w[i]>='0' && w[i]<='9'){//读取的首个字符是数字
                 ptr=col1;
                 num_map=4;
             }else if (strchr(col3[0].str,w[i])==NULL){
                 printf("非法字符%c\n",w[i]);
                 i++;
                 continue;
             }else{
                 ptr=col3;
                 num_map=1;
             }
             i--;
             s=1;//开始处理一个单词
             //当s不为0时执行循环,s代表的是有限状态自动机中的状态集

             while (s!=0){
                 act(s);//s=1时,strTOKEN[0]='\0'
                 if (s>=11 && s<=14){
                     break;
                 }
                 /*
                  getchar()
                  i= (-1)=>{0}
         */
                 i++;
                 /*
                  s=1 w[0]='b'
                 */
                 s=find(s,w[i]);
             }

             //如果s是等于0跳出的循环
             if (s==0){
                 strTOKEN[i_str]='\0';//代表字符串的结尾
                 printf("词法错误：%s\n",strTOKEN);
             }
         }while (w[i]!=10);
     }


     printf("关键字表：");//输出结果
     printf("\n");
     for (i=0;i<28;i++){
         printf("%s ",keywords[i]);
     }

     printf("\n");

     printf("Token序列 :");
     printf("\n");
     for (i=0;i<num_token;i++){
         printf("(%d,%d)",tok[i].code,tok[i].value);
     }

     printf("\n");


     printf("符号表：");
     printf("\n");
     for (i=0;i<num_ID;i++){
         printf("%s ",ID[i]);
     }
     printf("\n");



     printf("常数表：");
     printf("\n");
     for (i=0;i<num_C;i++){
         printf("%.4f ",C[i]);
     }
     printf("\n");


     fclose(fp);
     return 0;
 }

 void act(int s){
     int code;
     switch (s){
         case 1:
             n=0;m=0;p=0;t=0;e=1;num=0;i_str=0;
             strTOKEN[i_str]='\0';                   //其它变量初始化
             break;
             /*
              n,p,m,e,t;
              尾数值，指数值，小数位数，指数符号，类型
              */
         case 2:
             n=10*n+w[i]-48;
             break;

         case 3:
             t=1;
             break;

         case 4:
             n=10*n+w[i]-48;
             m++;
             break;

         case 5:
             t=1;
             break;

         case 6:
             if (w[i]=='-'){
                e=-1;
             }
             break;

         case 7:
             p=10*p+w[i]-48;
             break;

         case 8:
             strTOKEN[i_str++]=w[i];  //将ch中的符号拼接到strTOKEN的尾部；
             break;

         case 9:
             strTOKEN[i_str++]=w[i];  //将ch中的符号拼接到strTOKEN的尾部；
             break;

         case 10:
             strTOKEN[i_str++]=w[i]; //将ch中的符号拼接到strTOKEN的尾部；
             break;

         case 11:
             num=n*pow(10,e*p-m);           //计算常数值
             tok[i_token].code=2;
             tok[i_token++].value=InsertConst(num);  //生成常数Token
             num_token++;
             break;

         //case12是当你的首个输入字符是字母,而后的情况中遇到了输入非字母和数字会执行的情况
         case 12:
             strTOKEN[i_str]='\0';
             code=Reserve(strTOKEN);//查界符表
             if (code){//如果匹配到界符表中的元素,生成关键字Token
                 tok[i_token].code=code;//将之前减去的3加回来
                 tok[i_token++].value=0;
             }else{
                 tok[i_token].code=1;
                 tok[i_token++].value=InsertID(strTOKEN);
             }//生成标识符Token
             num_token++;//token的个数
             break;

         case 13:
             strTOKEN[i_str]='\0';
             code=Reserve(strTOKEN);//查界符表
             if (code){
                 tok[i_token].code=code;
                 tok[i_token++].value=0;
                  }//生成界符Token
             else{
                 strTOKEN[strlen(strTOKEN)-1]='\0';//单界符
                 i--;
                 code=Reserve(strTOKEN);//查界符表
                 tok[i_token].code=code;
                 tok[i_token++].value=0;//生成界符Token
             }
             num_token++;
             break;

         case 14:
             strTOKEN[i_str]='\0';
             code=Reserve(strTOKEN);//查界符表
             tok[i_token].code=code;
             tok[i_token++].value=0;//生成界符Token
             num_token++;
             break;
     }
 }
 //find方法:输入一个int和一个char
 //s=1,ch='b'
 //s代表的是当前状态集,ch代表的是当前读取到的字符
 //find方法,接收一个状态集和一个字符集,返回下一步的状态集
 int find(int s,char ch){
     int i;
     int col=7;//默认了i的值是-1,如果之后查找不到的话直接认作是其他符号输入，再进行判断是不是界符
     struct map *p = ptr;
     for (i=0;i<num_map;i++){
         /*
          ch='b' 返回'b'在str数组中的位置,即判断是否存在这个元素
         */
         if (strchr((p+i)->str,ch)){
             col=(p+i)->col;//如果是'd',则col=5
             break;
         }
     }
     //返回当前状态和字符所对应的有限状态自动机中的位置
     return aut[s][col];
 }
 //插入数字
 int InsertConst(double num){
     int i;
     for (i=0;i<num_C;i++){
         if (num==C[i]){
             return i+1;
         }
     }
     C[i]=num;
     num_C++;
     return i+1;
 }
 /*
  Reserve方法,读入一个字符
  判断是不是界符,如果是界符,则返回符合界符数组的下标+3,否则返回0
  */
 int Reserve(char *str){
     int i;
     for (i=0;i<num_key;i++){
         //判断str的元素是不是keywords里的
         if (!strcmp(keywords[i],str)){
             return (i+1);
         }
     }
     return 0;
 }
 /*
  InsertId方法,输入一个字符
  返回当前的字符串在ID字符串数组中的下标
  */
 int InsertID(char *str){
     //查看在ID数组中有没有和输入字符串相同的元素
     //第一个字符串没有,i=num_ID=0
     int i;
     for (i=0;i<num_ID;i++){
         if (!strcmp(ID[i],str)){
             return i+1;
         }
     }
     //将当前输入的str copy到ID[i]中
     strcpy(ID[i],str);
     num_ID++;
     return i+1;
 }