Pascal语言子集词法分析器

有空再来写注释^o^

测试用Pascal代码片断
begin
 ab2a:=9;
 if x>=0 then x:=x+1;
 while a=0 do
  b:=a*x/33455;
 end
#

---------------------------------------------------------------------
测试结果
syn     |value
________|________
1       |begin
10      |ab2a
18      |:=
11      |9
26      |;
2       |if
10      |x
24      |>=
11      |0
3       |then
10      |x
18      |:=
10      |x
14      |+
11      |1
26      |;
4       |while
10      |a
25      |=
11      |0
5       |do
10      |b
18      |:=
10      |a
16      |*
10      |x
17      |/
11      |33455
26      |;
6       |end
0       |#
Press any key to continue

分析器的C代码------------------------------------------------------------------------------------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WD_LEN 255
#define MAX_INT 32767
#define MAX_SRC_LEN 1000
#define MAX_WD_CNT 100
#define KWD_CNT 6
/************************************************/

union value_type{
       int d;
       char c;
       char s[MAX_WD_LEN];
};

typedef struct{
       int syn;
       union value_type value;
}word_type;

/************************************************/
char *keywords[20]={"begin","if","then","while","do","end"};
char source[MAX_SRC_LEN];
word_type word_stack[MAX_WD_CNT];
int line=1,wtop=0,ip=0;

/************************************************/
void p_word_stack(){
       int i;word_type w;
       printf("syn/t|value/n");
       printf("________|________/n");
       for(i=0;i<wtop;i++){
               w=word_stack[i];
               if( (w.syn>=1 && w.syn<=10) || w.syn==18 || w.syn==21 || w.syn==22
|| w.syn==24)
                       printf("%d/t|%s/n",w.syn,w.value.s);
               else if(w.syn==11)
                       printf("%d/t|%d/n",w.syn,w.value.d);
               else
                       printf("%d/t|%c/n",w.syn,w.value.c);
       }
       return ;
}
void tell_err(){
       printf("error in line %d/n",line);
       exit(1);
       return ;
}
void scan(){
       word_type w;
       char c;
       int j=0;
       if(isdigit(c=source[ip])){
               w.syn=11;   /* dd*  */
               w.value.d=c-'0';
               while(isdigit(c=source[++ip]))
                       w.value.d=w.value.d*10+c-'0';
      if(!isalpha(c))
      word_stack[wtop++]=w;
      else
      tell_err();
               return;
       }
       if(isalpha(c=source[ip])){
               w.syn=10;    /*  (ll|d) */
               w.value.s[0]=c;
               while(isalpha(c=source[++ip]) || isdigit(c))
                       w.value.s[++j]=c;
               w.value.s[j+1]='/0';
               for(j=0;j<KWD_CNT;j++){
                       if(strcmp(keywords[j],w.value.s)==0)
                               w.syn=j+1;
               }
               word_stack[wtop++]=w;
               return ;
       }
       switch(c=source[ip]){
               case '+' :
                       w.syn=14;  /* '+' */
                       w.value.c='+';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case '-' :
                       w.syn=15; /* '-' */
                       w.value.c='-';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case '*' :
                       w.syn=16;  /* '*' */
                       w.value.c='*';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case '/' :
                       w.syn=17;
                       w.value.c='/';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case ':' :
                       w.syn=19;
                       w.value.c=':';
                       if( (c=source[++ip]) !='='){
                               word_stack[wtop++]=w;

                       }
                       else if(c=='='){
                               strcpy(w.value.s,":=");
                               w.syn=18;
                               word_stack[wtop++]=w;
                               ip++;
                       }
                       break;
               case '<' :
                       w.syn=20;
                       w.value.c='<';
                       if( (c=source[++ip]) !='>' && c!='='){
                               word_stack[wtop++]=w;
                       }
                       else if(c=='>'){
                               w.syn=21;
                               strcpy(w.value.s,"<>");
                               word_stack[wtop++]=w;
                               ip++;
                       }
                       else if(c=='='){
                               w.syn=22;
                               strcpy(w.value.s,"<=");
                               word_stack[wtop++]=w;
                               ip++;
                       }
                       break;
               case  '>' :
                       w.syn=23;
                       w.value.c='>';
                       if( (c=source[++ip]) !='='){
                               word_stack[wtop++]=w;
                       }
                       else if(c=='='){
                               w.syn=24;
                               strcpy(w.value.s,">=");
                               word_stack[wtop++]=w;
                               ip++;
                       }
                       break;
               case '=' :
                       w.syn=25;
                       w.value.c='=';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case ';' :
                       w.syn=26;
                       w.value.c=';';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case '(' :
                       w.syn=27;
                       w.value.c='(';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case ')' :
                       w.syn=28;
                       w.value.c=')';
                       word_stack[wtop++]=w;
                       ip++;
                       break;
               case ' ' :
                       while(source[++ip]==' ');
                       break;
               case '/n' :
                       line++;
                       while(source[++ip]=='/n')line++;
                       break;

               case '/t' :
                       while(source[++ip]=='/t');
                       break;
               case '/r' :
                       while(source[++ip]=='/r');
                       break;
               default:
                       tell_err();
       }
       return;
}
int main(){
       FILE* fp;
       int i=0;
       word_type w;
       fp=fopen("input.txt","r");
       while(!feof(fp))
               source[i++]=getc(fp);
       fclose(fp);
       while(source[ip]!='#')
               scan(ip);
       w.syn=0;
       w.value.c='#';
       word_stack[wtop++]=w;
       p_word_stack();
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
用C语言编写一个只包含部分保留字和单词符号(见语言子集L)的PASCAL语言的扫描器(词法分析器)。 1、该词法分析器的任务: (1)滤掉源程序中的无用成分,如空格; (2)输出分类记号供语法分析器使用,并产生两个表格:常数表和标识符表,表的数据结构和包含内容可参看书中第八章符号表; (3)识别非法输入,并将其标记为“出错记号”。 2、该词法分析器的功能: 以在下面段落中定义的PASCAL语言子集的源程序作为词法分析程序的输入数据。词法分析器打开该文件后,自文件头开始扫描源程序字符,发现符合“单词”定义的源程序字符串时,将它翻译成固定长度的单词内部表示,并查填适当的信息表,一旦发现不符合“单词”定义的源程序字符串时,给出错误提示。经过词法分析后,源程序字符串(源程序的外部表示)被翻译成具有等长信息的单词串(源程序的内部表示),并产生两个表格:常数表和标识符表,它们分别包含了源程序中的所有常数和所有标识符。 3、PASCAL语言子集L----保留字不区分大小写 L={ VAR,BEGIN,ELSE,END,IF,THEN,REAL,INTEGER,=,:=,-,+,*,; ,. }∪{常数}∪{标识符} 4、实验要求: (1)词法分析器构造:正规式设计、DFA设计、代码编写; (2)进行词法分析器测试测试例程(一小段程序)、测试结果与测试结果分析。 (3)实验报告格式自拟;实验报告中要对实验要求中正规式设计、DFA设计、代码编写、测试例程(3中定义的PASCAL语言子集的源程序段)、测试结果与测试结果分析逐项给予说明。 例子: 本例中单词符号(记号)的种类: 1、保留字; 2、分隔符; 3、运算符; 4、等符 5、常数; 6、标识符 (单词符号的分类可以自己规定,只是要在实验报告中给出说明) 测试例程PASCAL程序清单如下: BEGIN IF I=1 THEN ^ A:=I+1 ELSE *& IF I=2 THEN A:=I+11; END. # 运行词法分析程序后,显示如下结果如下: BEGIN (1,1) IF (1,4) I (6,0) = (4,2) 1 (5,0) THEN (1,5) ^ error, not a word A (6,1) := (2,2) I (6,0) + (3,0) 1 (5,0) ELSE (1,2) * (3,2) & error, not a word IF (1,4) I (6,0) = (4,2) 2 (5,1) THEN (1,5) A (6,1) := (2,2) I (6,0) + (3,0) 11 (5,2) ; (2,1) END (1,3) . (2,3) # error, not a word over 常数表CT:1,2 标识符表LT:I,A

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值