运行环境是VS2015+win10下,完成如下规则的词法分析器。
//(1)该语言大小写不敏感;
//(2)字母为 a - z A - Z,数字为 0 - 9;
//(3)可以对上述文法进行扩充和改造;
//(4) ‘/*……*/’为程序的注释部分。
//[设计要求]
//(1)给出各单词符号的类别编码;
//(2)词法分析程序应能发现输入串中的错误;
//(3)词法分析作为单独一遍编写,词法分析结果为二元式序列组成的中间文件;
//(4)设计两个测试用例(尽可能完备),并给出测试结果。
/*<标识符>→字母︱ <标识符>字母︱ <标识符>数字
<无符号整数>→数字︱ <无符号整数>数字
<单字符分界符> →+ ︱- ︱* ︱; ︱(︱)
<双字符分界符>→<大于>=︱<小于>=︱<小于>>︱<冒号>=︱<斜竖>*
<小于>→<
<等于>→=
<大于>→>
<冒号> →:
<斜竖> →/
该语言的保留字 : begin end if then else for do while and or not
*/
#include "stdafx.h"
#include <ctype.h>
//类别编码说明
/*
保留字:
begin 1
end 2
if 3
then 4
else 5
for 6
do 7
while 8
and 9
or 10
not 11
标识符12
数字 13
<= 14
<> 15
< 16
:= 17
: 18
>= 19
> 20
+ 21
- 22
* 23
; 24
| 25
左注释26
右注释27
( 28
) 29
, 30
. 31
*/
//保留字
char reverse[11][10] = {"begin","end","if","then","else","for","do","while","and","or","not"};
int line = 1;//行数
void out(int a, char* s)//输出单词符号的类别编码
{
printf("%d,<%s>\n", a, s);
}
int lookup(char* token) //实现查找保留字
{
int i = 0;
int index = 0;
int match = 0; //是否匹配
while (index != 11)
{
match = 0;
while (reverse[index][i] != '\0')
{
if (reverse[index][i] == token[i])
i++;
else {
match = -1;
break;
}
}
if (match == -1)
index++;
else {
return index+1;
}
}
return 0;
}
void scanner(FILE *fp)
{
char ch;
int i, c;
char TOKEN[20];
ch = fgetc(fp);
if (isalpha(ch))
{
ch = tolower(ch);
TOKEN[0] = ch;
ch = fgetc(fp);
ch = tolower(ch);
i = 1;
while (isalnum(ch))
{
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
ch = tolower(ch);
}
TOKEN[i] = '\0';
fseek(fp, -1, 1);
c = lookup(TOKEN); //查找保留字
if (c == 0)
out(12, TOKEN); //标识符
else
out(c, TOKEN); //保留字
}
else if (isdigit(ch)) //判断整数
{
TOKEN[0] = ch;
ch = fgetc(fp);
i = 1;
while (isdigit(ch))
{
TOKEN[i] = ch;
i++;
ch = fgetc(fp);
}
if (isalpha(ch)) //错误的标识符格式
printf("error in line %d,wrong format for it!\n",line);
TOKEN[i] = '\0';
fseek(fp, -1, 1);
out(13, TOKEN); //整型
}
else switch (ch)
{
case '<':
ch = fgetc(fp);
if (ch == '=')
out(14, "<=");
else
if (ch == '>')
out(15, "<>");
else
{
fseek(fp, -1, 1);
out(16, "<");
}
break;
case ':':
ch = fgetc(fp);
if (ch == '=')
out(17, ":=");
else
{
fseek(fp, -1, 1);
out(18, ":");
}
break;
case '>':
ch = fgetc(fp);
if (ch == '=')
out(19, ">=");
else
{
fseek(fp, -1, 1);
out(20, ">");
}
break;
case '+':
out(21, "+");
break;
case '-':
out(22, "-");
break;
case '*':
if (fgetc(fp) == '/')
{
out(27, "*/");
}
else
{
out(23, "*");
fseek(fp,-1,1);
}
break;
case ';':
out(24, ";");
break;
case '|':
out(25, "|");
break;
case '/':
ch = fgetc(fp);
if (ch == '*')
{
out(26, "/*");
printf("注释内容:");
char x=fgetc(fp),
y=fgetc(fp);
while (x != '*'|| y!='/')
{
printf("%c", x);
x = y;
y = fgetc(fp);
if (x == '*'&&y == '/')
{
printf("\n");
out(27, "*/");
}
}
}
else
fseek(fp,-1,1);
break;
case '(':
out(28, "(");
break;
case ')':
out(29, ")");
break;
case ',':
out(30, ",");
break;
case '.':
out(31, ".");
break;
default:
if (ch == ' ' || ch == '\n' || ch == '\t')
if (ch == '\n')
line++;
else;
else
printf("error in line %d,can't recognize this character! %c\n", line, ch);
break;
}
}
int main()
{
FILE *fp;
fp = fopen("code.txt", "r");
char c;
printf("\n源程序为:\n");
while ((c = fgetc(fp)) != EOF)
{
printf("%c", c);
}
printf("\n\n");
fp = fopen("code.txt", "r");
while (fgetc(fp) != EOF) //是否到代码末尾
{
fseek(fp,-1,1);
scanner(fp); //主要函数
}
return 0;
}