实验一、词法分析实验

一、 实验目的

通过设计一个词法分析程序,对词法进行分析,加强对词法的理解,掌握对程序设计语言的分解和理解。

二、 实验内容和要求

在原程序中输入源代码

  • 对字符串表示的源程序  
  • 从左到右进行扫描和分解
  • 根据词法规则
  • 识别出一个一个具有独立意义的单词符号
  • 以供语法分析之用
  • 发现词法错误,则返回出错信息

在源程序中,自动识别单词,把单词分为五种,并输出对应的单词种别码。

  1. 识别关键字:main if int for while do return break continue,该类的单词码为1.
  2. 识别标识符:表示各种名字,如变量名、数组名、函数名等,如char ch, int syn, token,sum,该类的单词码为2.
  3. 运算符:+、-、*、/、=、>、<、>=、<=、!=
  4. 分隔符:,、;、{、}、(、)
  5. 常数,例:123

各种单词符号对应的种别码。

 

输出形式:

  • 二元式

– (单词种别,单词自身的值)

  • 单词种别,表明单词的种类,语法分析需要的重要信息

– 整数码

  • 关键字、运算符、界符:一符一码
  • 标识符:10, 常数:11
  • 单词自身的值

– 标识符token、常数sum

– 关键字、运算符、界符token

三、 实验方法、步骤及结果测试

#include <stdio.h>
 #include <ctype.h>
 #include <malloc.h>
 #include <stdlib.h> 
 #include <string.h>
 #define NULL 0   
 FILE *fp; 
   char cbuffer;
   char  *key[32]={"if","else","for","break","continue","int","float","double","auto","case","char","const","default","do","enum","long","extern","goto","register","return","short","signed","sizeof","static","struct","switch","typedef","union","unsigned","void","volatile","while"};
   char *border[8]={",",";","{","}","(",")","[","]"}; 
   char *arithmetic[4]={"+","-","*","/"}; 
   char *relation[6]={"<","<=","=",">",">=","<>"}; 
   char *consts[20]; char *label[20]; 
   int constnum=0,labelnum=0; 
   int search(char searchchar[],int wordtype) 
{   
   int i=0; 
   switch(wordtype)  
{ 
   case 1:for(i=0;i<=31;i++)  
{   
   if (strcmp(key[i],searchchar)==0)  
   return(i+1);   
}  
   return 0; 
   case 2:   
{  for (i=0;i<=7;i++)   
{   
   if (strcmp(border[i],searchchar)==0) 
   return(i+1);
}    
      return(0);    
}   
   case 3:  
{   
   for (i=0;i<=3;i++)   
{  
   if (strcmp(arithmetic[i],searchchar)==0)  
{   
   return(i+1);  
}   
}   
   return(0); 
}  
   case 4: 
{ 
   for (i=0;i<=5;i++)  
   if (strcmp(relation[i],searchchar)==0)  
    return(i+1); 
   return(0);  
 } 
   case 5: 
{ 
   for (i=0;i<=constnum;i++)  
{   
   if(consts[i] && (strcmp(consts[i],searchchar)==0)) 
    return(i+1);
    }  
   consts[i-1]=(char *)malloc(sizeof(searchchar)); 
    strcpy(consts[i-1],searchchar); 
    constnum++;  
   return(i); 
   } 
 case 6:
   {  
  for(i=0;i<=labelnum;i++) 
    if(label[i] && (strcmp(label[i],searchchar)==0)) 
     return(i+1); 
    label[i-1]=(char *)malloc(sizeof(searchchar));  
  strcpy(label[i-1],searchchar); 
   labelnum++;  
  return(i); 
   } 
default: 
return 0;
  }
 } 
 char alphaprocess(char buffer) 
{ 
 // int atype; 
 int i=-1; 
  char alphatp[20]; 
  while((isalpha(buffer))||(isdigit(buffer))) 
 {  
 alphatp[++i]=buffer;
   buffer=fgetc(fp); 
 }  
 alphatp[i+1]='\0'; 
  if (/*atype=*/search(alphatp,1)) 
//  printf("%s (1,%d)\n",alphatp,atype-1);
   printf("(1,  \"%s\")\n",alphatp);
  else 
 {  
 search(alphatp,6);
 //  printf("%s (6,%d)\n",alphatp,atype-1);
   printf("(2,  \"%s\")\n",alphatp);
  } 
  return(buffer);
 }  
 char digitprocess(char buffer)
 {  
 int i=-1; 
  char digittp[20];
 // int dtype; 
  while ((isdigit(buffer))) 
 { 
  digittp[++i]=buffer;
   buffer=fgetc(fp); 
 }
   digittp[i+1]='\0'; 
 search(digittp,5); 
 // printf("%s (5,%d)\n",digittp,dtype-1);
 printf("(3,  \"%s\")\n",digittp);  return(buffer); 
} 
  char otherprocess(char buffer)
{  
 int i=-1;
   char othertp[20];
 // int otype,otypetp; 
 othertp[0]=buffer; 
 othertp[1]='\0';  
 if (/*otype=*/search(othertp,3))
  { 
//  printf("%s (3,%d)\n",othertp,otype-1);  
 printf("(4,  \"%s\")\n",othertp); 
  buffer=fgetc(fp); 
  goto out; 
 } 
  if (/*otype=*/search(othertp,4))
  {  
 buffer=fgetc(fp); 
  othertp[1]=buffer; 
  othertp[2]='\0';  
 if (/*otypetp=*/search(othertp,4)) 
  {  
//  printf("%s (4,%d)\n",othertp,otypetp-1); 
   printf("(4,  \"%s\")\n",othertp); 
   goto out; 
  }  
 else 
   othertp[1]='\0'; 
//  printf("%s (4,%d)\n",othertp,otype-1); 
  printf("(4,  \"%s\")\n",othertp);
   goto out; 
 }  
 if (buffer==':')  
{  
 buffer=fgetc(fp); 
  if (buffer=='=') 
   printf(":= (2,2)\n"); 
  buffer=fgetc(fp); 
  goto out; 
 } 
 else 
 {  
 if (/*otype=*/search(othertp,2)) 
  { 
//    printf("%s (2,%d)\n",othertp,otype-1);
     printf("(5,  \"%s\")\n",othertp);
  buffer=fgetc(fp);
     goto out; 
   }
  }
   if ((buffer!='\n')&&(buffer!=' ')) 
  printf("%c error,not a word\n",buffer);
  buffer=fgetc(fp); 
   out:   
   return(buffer);
 } 
  void main()
 { 
  int i;
     for (i=0;i<=20;i++)
  {  
 label[i]=NULL;  
 consts[i]=NULL; 
 } 
  if ((fp=fopen("example.c","r"))==NULL) 
  printf("error"); 
 else 
 {  
 cbuffer = fgetc(fp);
   while (cbuffer!=EOF) 
  {  
  if (isalpha(cbuffer)) 
    cbuffer=alphaprocess(cbuffer); 
   else 
if(isdigit(cbuffer)) 
    cbuffer=digitprocess(cbuffer); 
   else 
cbuffer=otherprocess(cbuffer); 
  }  
 printf("over\n");
   getchar(); 
 } 
}  
#inlclude<stdio.h>
 int main()
 { 
     int d,a,c; a=10;
     c=a+b; 
     printf("%d%d",a,b);; 
     return 0;
 }

四.运行结果及分析

 

转载于:https://www.cnblogs.com/sewhen/p/5985505.html

设计思想 (1)程序主体结构部分: 说明部分 %% 规则部分 %% 辅助程序部分 (2)主体结构的说明 在这里说明部分告诉我们使用的LETTER,DIGIT, IDENT(标识符,通常定义为字母开头的字母数字串)和STR(字符串常量,通常定义为双引号括起来的一串字符)是什么意思.这部分也可以包含一些初始化代.例如用#include来使用标准的头文件和前向说明(forward ,references).这些代应该再标记"%{"和"%}"之间;规则部分>可以包括任何你想用来分析的代;我们这里包括了忽略所有注释中字符的功能,传送ID名称和字符串常量内容到主调函数和main函数的功能. (3)实现原理 程序中先判断这个句语句中每个单元为关键字、常数、运算符、界符,对与不同的单词符号给出不同编形式的编,用以区分之。 PL/0语言的EBNF表示 <常量定义>::=<标识符>=<无符号整数>; <标识符>::=<字母>={<字母>|<数字>}; <加法运算符>::=+|- <乘法运算符>::=*|/ <关系运算符>::==|#|<|<=|>|>= <字母>::=a|b|…|X|Y|Z <数字>::=0|1|2|…|8|9 三:设计过程 1. 关键字:void,main,if,then,break,int,Char,float,include,for,while,printfscanf 并为小写。 2."+”;”-”;”*”;”/”;”:=“;”:”;”<“;”<=“;”>“;”>=“;”<>“;”=“;”(“;”)”;”;”;”#”为运算符。 3. 其他标记 如字符串,表示以字母开头的标识符。 4. 空格符跳过。 5. 各符号对应种别 关键字分别对应1-13 运算符分别对应401-418,501-513。 字符串对应100 常量对应200 结束符# 四:举例说明 目标:实现对常量的判别 代: digit [0-9] letter [A-Za-z] other_char [!-@\[-~] id ({letter}|[_])({letter}|{digit}|[_])* string {({letter}|{digit}|{other_char})+} int_num {digit}+ %% [ |\t|\n]+ "auto"|"double"|"int"|"struct"|"break"|"else"|"long"|"switch"|"case"|"enum"|"register"|"typedef"|"char"|"extern"|"return"|"union"|"const"|"float"|"short"|"unsigned"|"continue"|"for"|"signed"|"void"|"default"|"goto"|"sizeof"|"do"|"if"|"static"|"while"|"main" {Upper(yytext,yyleng);printf("%s,NULL\n",yytext);} \"([!-~])*\" {printf("CONST_string,%s\n",yytext);} -?{int_num}[.]{int_num}?([E][+|-]?{int_num})? {printf("CONST_real,%s\n",yytext);} "0x"?{int_num} {printf("CONST_int,%s\n",yytext);} ","|";"|"("|")"|"{"|"}"|"["|"]"|"->"|"."|"!"|"~"|"++"|"--"|"*"|"&"|"sizeof"|"/"|"%"|"+"|"-"|">"|"<"|">="|"<="|"=="|"!="|"&"|"^"|"|"|"&"|"||"|"+="|"-="|"*="|"/="|"%="|">>="|"<<="|"&="|"^="|"|="|"=" {printf("%s,NULL\n",yytext);} {id} {printf("ID,%s\n",yytext);} {digit}({letter})+ {printf("error1:%s\n",yytext);} %% #include <ctype.h> Upper(char *s,int l) { int i; for(i=0;i<l;i++) { s[i]=toupper(s[i]); } } yywrap() { return 1; } 五:DFA 六:数据测试 七:心得体会 其实匹配并不困难,主要是C++知识要求相对较高,只要把握住指针就好了。 附源程序: #include<iostream.h> #include<stdio.h> #include<stdlib.h> #include<string.h> int i,j,k,flag,number,status; /*status which is use to judge the string is keywords or not!*/ char ch; char words[10] = {" "}; char program[500]; int Scan(char program[]) { char *keywords[13] = {"void","main","if","then","break","int", "char","float","include","for","while","printf", "scanf"}; number = 0; status = 0; j = 0; ch = program[i++]; /* To handle the lettle space ands tab*/ /*handle letters*/ if ((ch >= 'a') && (ch <= 'z' )) { while ((ch >= 'a') && (ch <= 'z' )) { words[j++]=ch; ch=program[i++]; } i--; words[j++] = '\0'; for (k = 0; k < 13; k++) if (strcmp (words,keywords[k]) == 0) switch(k) { case 0:{ flag = 1; status = 1; break; } case 1:{ flag = 2; status = 1; break; } case 2:{ flag = 3; status = 1; break; } case 3:{ flag = 4; status = 1; break; } case 4:{ flag = 5; status = 1; break; } case 5:{ flag = 6; status = 1; break; } case 6:{ flag = 7; status = 1; break; } case 7:{ flag = 8; status = 1; break; } case 8:{ flag = 9; status = 1; break; } case 9:{ flag = 10; status = 1; break; } case 10:{ flag = 11; status = 1; break; } case 11:{ flag = 12; status = 1; break; } case 12:{ flag = 13; status = 1; break; } } if (status == 0) { flag = 100; } } /*handle digits*/ else if ((ch >= '0') && (ch <= '9')) { number = 0; while ((ch >= '0' ) && (ch <= '9' )) { number = number*10+(ch-'0'); ch = program[i++]; } flag = 200; i--; } /*opereation and edge handle*/ else switch (ch) { case '=':{ if (ch == '=') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 401; } else { i--; flag = 402; } break; } case'>':{ if (ch == '>') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 403; } else { i--; flag = 404; } break; } case'<':{ if (ch == '<') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 405; } else { i--; flag = 406; } break; } case'!':{ if (ch == '!') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 407; } else { i--; flag = 408; } break; } case'+':{ if (ch == '+') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 409; } else if (ch == '+') { words[j++] = ch; words[j] = '\0'; flag = 410; } else { i--; flag = 411; } break; } case'-':{ if (ch == '-') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 412; } else if( ch == '-') { words[j++] = ch; words[j] = '\0'; flag = 413; } else { i--; flag = 414; } break; } case'*':{ if (ch == '*') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 415; } else { i--; flag = 416; } break; } case'/':{ if (ch == '/') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 417; } else { i--; flag = 418; } break; } case';':{ words[j] = ch; words[j+1] = '\0'; flag = 501; break; } case'(':{ words[j] = ch; words[j+1] = '\0'; flag = 502; break; } case')':{ words[j] = ch; words[j+1] = '\0'; flag = 503; break; } case'[':{ words[j] = ch; words[j+1] = '\0'; flag = 504; break; } case']':{ words[j] = ch; words[j+1] = '\0'; flag = 505; break; } case'{':{ words[j] = ch; words[j+1] = '\0'; flag = 506; break; } case'}':{ words[j] = ch; words[j+1] = '\0'; flag = 507; break; } case':':{ words[j] = ch; words[j+1] = '\0'; flag = 508; break; } case'"':{ words[j] = ch; words[j+1] = '\0'; flag = 509; break; } case'%':{ if (ch == '%') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 510; } else { i--; flag = 511; } break; } case',':{ words[j] = ch; words[j+1] = '\0'; flag = 512; break; } case'#':{ words[j] = ch; words[j+1] = '\0'; flag = 513; break; } case'@':{ words[j] = '#'; flag = 0; break; } default:{ flag = -1; break; } } return flag; } main() { i=0; printf("please input a program end with @"); do { ch = getchar(); program[i++] = ch; }while(ch != '@'); i = 0; do{ flag = Scan(program); if (flag == 200) { printf("(%2d,%4d)",flag,number); } else if (flag == -1) { printf("(%d,error)",flag); } else { printf("(%2d,%4s)",flag,words); } }while (flag != 0); system("pause"); }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值