PL/0语言词法分析
一、 实验目的
通过完成词法分析程序,了解词法分析的过程。编制一个读单词程序,对PL/0语言进行词法分析,把输入的字符串形式的源程序分割成一个个单词符号,即基本保留字、标识符、常数、运算符、界符五大类。
二、 实验环境
操作系统:window xp
编写环境:visual c++ 、c-free、turbo c
编写语言:c语言
分析语言:PL/0
三、 实验内容
对PL/0语言进行词法分析,把输入的字符串形式的源程序分割成一个个单词符号,其词法描述如下:
(1) 关键字:begin,call,const,do,end,if,odd,procedure,read,then,var,while,write
(2) 标识符:用来表示各种名字,必须以字母开头小于10位字符组成
(3) 数字:以0-9组成小于14位的数字
(4) 运算符:+,-,*,/,:=,<,<=,>,>=
(5) 界符:,,.,;,#
表1 各种单词符号对应类型表
单词符号 | 类型 |
+ | plus |
- | minus |
* | times |
/ | slash |
( | lparen |
) | rparen |
= | eql |
, | comma |
. | perio |
# | neq |
; | semicolon |
begin | beginsym |
call | callsym |
const | constsym |
do | dosym |
end | endsym |
if | ifsym |
odd | oddsym |
procedure | proceduresym |
read | readsym |
then | thensym |
var | varsym |
while | whilesym |
write | writesym |
GETSYM函数功能:
(1) 滤空格 空格在词法分析时是一种不可缺少的界符,而在语法分析时则是无用的,所以必须过滤
(2) 识别保留字 主程序定义了一个以字符为元素的一维数组WORD,称保留字表。对字母开头的字母、数字字符串要查此表。若查着则识别为保留字,将对应的类别放在SYM中。如IF的对应值IFSYM,THEN的对应值为THENSYM。若查不着,则认为是用户定义的标识符
(3) 识别保留字 对用户定义的标识符将IDENT放在SYM中,标识符本身的值放在ID中
(4) 拼数 当扫描到数字串时,将字符串形式的十进制数转换为二进制数,然后把数的类别NUMBER放在SYM中,数值本身的值放在NUM中
(5) 拼合复合词 对两个字符组成的算符,如:>=、:=、<=等单词,识别后将类别送SYM中
(6) 输出源程序 为边读入字符边输出(可输出在文件中)
四、
实验结果
要分析的内容如下:
- const a=36;
- var c,d;
- procedure p;
- begin
- var g;
- g:=1234567890000000;
- write(g);
- end;
- begin
- read(c,d);
- if c<=d then c:=a;
- write(c,d);
- call p;
- end.
pl0.h 头文件
- #define norw 13 /*关键字个数*/
- #define nmax 14 //number的最大位数
- #define al 10 //符号的最的长度 符号就是+ - 神马的
- #define cxmax 200 //最多的虚拟机代码数
- enum symbol {
- nul, ident, number, plus, minus, times, slash, oddsym, eql, neq,
- lss, leq, gtr, geq, lparen, rparen, comma, semicolon, period, becomes,
- beginsym, endsym, ifsym, thensym, whilesym, writesym, readsym, dosym,
- callsym, constsym, varsym, procsym,
- };
- FILE* fa1; //输出分析的文件和首地址 首地址是虚拟机指针
- char ch; //getch读取的字符
- enum symbol sym;
- char id[al+1]; //当前的ident
- int num;
- int cc, ll; //getch计数器
- int cx; //虚拟机代码指针,取值范围0-cxmax-1
- char line[81];
- char a[al+1]; //读取一个符号 暂时存在这里
- char word[norw][al]; //保留字13个 就是begin end if 什么的
- enum symbol wsym[norw]; //保留字对应的符号 begin对应beginsym
- enum symbol ssym[256]; //单字符的符号值
- FILE* fin;
- FILE* fout;
- char fname[al]; //输入的文件名
- int err;
- #define getchdo if(-1==getch()) return -1;
- void error(int n);
主函数
- #include <stdio.h>
- #include <string.h>
- #include "pl0.h"
- void error(int n)
- {
- char space[81];
- memset(space, 32, 81);
- space[cc-1] = 0;
- printf("****出现错误\n");
- fprintf(fa1, "****出现错误\n");
- err++;
- }
- int getch()
- {
- if(cc == ll)
- {
- if(feof(fin))
- {
- printf("读完了");
- return -1;
- }
- ll = 0;
- cc = 0;
- //printf("%d ", cx);
- //fprintf(fa1, "%d", cx);
- ch = ' ';
- while(ch != 10)
- {
- if(fscanf(fin, "%c", &ch) == EOF)
- {
- line[ll] = 0;
- break;
- }
- printf("%c", ch);
- fprintf(fa1, "%c", ch);
- line[ll] = ch;
- ll++;
- }
- printf("\n");
- //fprintf(fa1, "\n");
- }
- ch = line[cc];
- cc++;
- return 0;
- }
- int getsym()
- {
- int i, j, k;
- while(ch == ' ' || ch == 10 || ch == 9)
- {
- getchdo;
- }
- if(ch >= 'a' && ch <= 'z')
- {
- k = 0;
- do
- {
- if(k < al)
- {
- a[k] = ch;
- k++;
- }
- getchdo;
- }while(ch >= 'a' && ch <= 'z' || ch >= '0' && ch <= '9');
- a[k] = 0;
- strcpy(id, a);
- i = 0;
- j = norw-1;
- do
- {
- k = (i+j)/2;
- if(strcmp(id, word[k]) <= 0)
- j = k-1;
- if(strcmp(id, word[k]) >= 0)
- i = k+1;
- }while(i <= j);
- if(i-1 > j)
- {
- sym = wsym[k];
- }
- else
- {
- sym = ident;
- }
- }
- else
- {
- if(ch >= '0' && ch <= '9')
- {
- k = 0;
- num = 0;
- sym = number;
- do
- {
- num = num * 10 + ch - '0';
- k++;
- getchdo;
- }while(ch >= '0' && ch <= '9');
- k--;
- if(k > nmax)
- {
- error(30);
- }
- }
- else
- {
- if(ch == ':')
- {
- getchdo;
- if(ch == '=')
- {
- sym = becomes;
- getchdo;
- }
- else
- {
- sym = nul;
- }
- }
- else
- {
- if(ch == '<')
- {
- getchdo;
- if(ch == '=')
- {
- sym = leq;
- getchdo;
- }
- else
- {
- sym = lss;
- }
- }
- else
- {
- if(ch == '>')
- {
- getchdo;
- if(ch == '=')
- {
- sym = geq;
- getchdo;
- }
- else
- {
- sym = gtr;
- }
- }
- else
- {
- sym = ssym[ch];
- if(sym != period)
- {
- getchdo;
- }
- else
- return -1;
- }
- }
- }
- }
- }
- return 0;
- }
- void init()
- {
- int i;
- for(int i = 0; i < 256; i++)
- ssym[i] = nul;
- ssym['+'] = plus;
- ssym['-'] = minus;
- ssym['*'] = times;
- ssym['/'] = slash;
- ssym['('] = lparen;
- ssym[')'] = rparen;
- ssym['='] = eql;
- ssym[','] = comma;
- ssym['.'] = period;
- ssym['#'] = neq;
- ssym[';'] = semicolon;
- strcpy(&(word[0][0]), "begin");
- strcpy(&(word[1][0]), "call");
- strcpy(&(word[2][0]), "const");
- strcpy(&(word[3][0]), "do");
- strcpy(&(word[4][0]), "end");
- strcpy(&(word[5][0]), "if");
- strcpy(&(word[6][0]), "odd");
- strcpy(&(word[7][0]), "procedure");
- strcpy(&(word[8][0]), "read");
- strcpy(&(word[9][0]), "then");
- strcpy(&(word[10][0]), "var");
- strcpy(&(word[11][0]), "while");
- strcpy(&(word[12][0]), "write");
- wsym[0] = beginsym;
- wsym[1] = callsym;
- wsym[2] = constsym;
- wsym[3] = dosym;
- wsym[4] = endsym;
- wsym[5] = ifsym;
- wsym[6] = oddsym;
- wsym[7] = procsym;
- wsym[8] = readsym;
- wsym[9] = thensym;
- wsym[10] = varsym;
- wsym[11] = whilesym;
- wsym[12] = writesym;
- }
- int main()
- {
- printf("请输入要分析的文件名\n");
- scanf("%s", fname);
- fin = fopen(fname, "r");
- if(fin)
- {
- printf("请输入要保存的文件名\n");
- scanf("%s", fname);
- fa1 = fopen(fname, "w");
- init();
- err = 0;
- cc = cx = ll = 0;
- ch = ' ';
- while(getsym() != -1)
- {
- }
- printf("分析完毕");
- }
- else
- {
- printf("找不到文件\n");
- }
- printf("\n");
- return 0;
- }