大家好,下面介绍的是我当时上编译原理所做的实验,主要内容就是先对源程序进行预处理,然后再对处理过的程序进行词法分析。用的是最基本的C语言写的,如有不足,欢迎大家批评指正!
一、实验目的
设计并实现一个包含预处理功能的词法分析程序,加深对编译中词法分析过程的理解。
二、 实验要求
1、实现预处理功能
源程序中可能包含有对程序执行无意义的符号,要求将其剔除。
首先编制一个源程序的输入过程,从键盘、文件或文本框输入若干行语句,依次存入输入缓冲区(字符型数据);然后编制一个预处理子程序,去掉输入串中的回车符、换行符和跳格符等编辑性文字;把多个空白符合并为一个;去掉注释。
2、实现词法分析功能
输入:所给文法的源程序字符串。
输出:二元组(syn,token或sum)构成的序列。其中,
syn为单词种别码。
Token为存放的单词自身字符串。
Sum为整型常量。
具体实现时,可以将单词的二元组用结构进行处理。
三、实验设计
1)首先编写一个预处理子程序,用于读取文件并且去除文件中的回车换行、将多个空格合并成一个空格
2)然后再编写一个分析子程序,用于分析经过处理后的程序,并且识别枚举类型和共用体类型,识别struct,enum,识别&&和||、++和–,==、识别!+,-=,+=、识别浮点数、识别指数、识别指针变量、识别字符串、去掉多行注释、识别错误信息,数字后面跟有字母,123fds(这个有个bug,识别不了16进制的数字,比如123efh,他其实是一个16进制数,但是也会当作错误信息)、对处理程序的大小没有限制
3)显示处理过的程序并保存在相应的文件中
四、代码实现及运行效果
运行效果
代码由三个文件构成,如下图所示
其中,Hong.h中主要是对一下常用的变量进行宏定义,fun.h中是对各种方法的实现,main.c则是对fun.h中实现的方法的简单的调用,一些需要注意的地方都在代码中以注释的形式展现,话不多说,上代码!
Hong.h
#include <stdio.h>
#define SizeRes 60
#define Sizestr 20
char ch; //字符变量,存访最新读进的源程序字符
char strToken[Sizestr]; //字符数组,存访构成单词符号的字符串
int GetBC(); //子程序过程,检查ch中的字符是否为空白,若是,则调用GetChar(),直至ch中进入一个非空白字符
int IsLetter(); //布尔函数过程,判断ch是否为字母
int IsDigit(); //布尔函数过程额,判断ch是否是数字
char const *FindRes(char str[]); //确定是否为关键字,如果是关键字返回其类型
int IsIdentifier(); //判断是否是标识符的组成
int IsFloat(); //判断是否是浮点数的组成
char *IsIntorFloatorExponent(char string[]); //判断是整数还是浮点数还是指数
int IsError(); //判断数字后面的东西
FILE *InserRes(char const *type, char const *value, FILE *p); //向result文件中输出信息,并返回文件指针
void Yu(char F_open[], char F_out[]); //预处理程序
//结构体数组
struct s1
{
char const *type;
char const *name;
} ReservedWord[SizeRes] = {
{"1", "main"}, {"2", "if"}, {"3", "then"}, {"4", "while"}, {"5", "do"},
{"6", "static"}, {"7", "int"}, {"8", "double"}, {"9", "struct"}, {"10", "break"},
{"11", "else"}, {"12", "long"}, {"13", "swith"}, {"14", "case"}, {"15", "typedef"},
{"16", "char"}, {"17", "return"}, {"18", "const"}, {"19", "float"}, {"20", "short"},
{"21", "continue"}, {"22", "for"}, {"23", "void"}, {"24", "default"}, {"25", "sizeof"},
{"26", "enum"}, {"27", "union"}, //1、新加的枚举类型和共用体类型
{"28", "+"},{"29", "-"},{"30", "*"},{"31", "/"},{"32", ":"},{"33", ":="},{"34", "<"},
{"35", "<>"},{"36", "<="},{"37", ">"},{"38", ">="},{"39", "="},{"40", ";"},{"41", "("},
{"42", ")"},{"43", "||"},{"44", "&&"},{"45", "{"},{"46", "}"}, //2、新加或运算和与运算
{"47", "++"},{"48", "--"},{"49", ","},{"50", "=="},{"51", "["},{"52", "]"},{"53", "#"},
{"54", "include"},{"55", "define"},{"56", "&"},{"57", "."},{"58", "+="},{"59", "-="},
{"60", "!="} //3、++和--和==
};
fun.h
#include <string.h>
#include "Hong.h"
//确定是否为关键字,如果是关键字返回其类型
char const *FindRes(char str[])
{
for (int i = 0; i < SizeRes; i++)
{
if (strcmp(str, ReservedWord[i].name) == 0)
return ReservedWord[i].type;
}
return "ID";
}
//判断ch是否是字母
int IsLetter()
{
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
return 1;
return 0;
}
//判断是否是标识符的组成
int IsIdentifier()
{
if (IsLetter() || IsDigit() || ch == '_')
return 1;
return 0;
}
//判断是否是浮点数的组成
int IsFloat()
{
if (IsDigit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '-')
return 1;
return 0;
}
//判断是整数还是浮点数还是指数
char *IsIntorFloatorExponent(char string[])
{
for (int i = 0; string[i] != '\0'; i++)
{
if (string[i] == '.')
return "float";
if (string[i] == 'e' || string[i] == 'E')
return "Exponent";
}
return "int";
}
//判断ch是否是数字
int IsDigit()
{
if (ch >= '0' && ch <= '9')
return 1;
return 0;
}
//判断数字后面的东西
int IsError()
{
if (IsLetter() || IsDigit())
return 1;
return 0;
}
//检查ch中是否为空白,如果是,将指针移到第一个不是的地方
int GetBC(char str[], int i)
{
while (str[i] != '\0' && str[i] == ' ')
{
i++;
}
return i;
}
//向result文件中输出信息,并返回文件指针
FILE *InserRes(char const *type, char const *value, FILE *p)
{
fputs(type, p);
fputc(',', p);
fputc('\t', p);
fputs(value, p);
fputc('\n', p);
return p;
}
//进行单词分割和区分
void Process(char F_open[], char Result_file[], char error_file[])
{
FILE *fp;
fp = fopen(F_open, "r");
FILE *res_file;
res_file = fopen(Result_file, "w");
FILE *err_file;
err_file = fopen(error_file, "w");
int size = 20;
char L1[size];
char temp[size];
int index = 0;
int WritedFlag = 0;
while (fgets(L1, size, fp) != NULL)
{
int i = 0;
while (L1[i] != '\0')
{
ch = L1[i];
if (IsDigit())
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0') //读到第一个数字,但是是最后一个字符的情况
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
while (IsFloat())
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0') //如果找到最后,数字被截断了,就把标志状态改一下
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
}
if (IsLetter())
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
while (IsError())
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
}
strToken[index] = '\0';
printf("Errors! ");
printf("%s\n", strToken);
fputs(strToken, err_file);
fputc('\n', err_file);
res_file = InserRes("error", strToken, res_file);
WritedFlag = 1;
}
else
{
strToken[index] = '\0';
printf("<%s\t,\t%s>\n", IsIntorFloatorExponent(strToken), strToken);
res_file = InserRes(IsIntorFloatorExponent(strToken), strToken, res_file); //向结果result文件中输出信息
WritedFlag = 1;
}
}
else if (IsLetter())
{
ch = L1[GetBC(L1, i)];
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
while (IsIdentifier())
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
}
strToken[index] = '\0';
}
else if (ch == '#')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == '+')
{
strToken[index] = ch;
index++;
i++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '+')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
strToken[index] = '\0';
}
else if (ch == '-')
{
strToken[index] = ch;
index++;
i++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '-')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
strToken[index] = '\0';
}
else if (ch == '!')
{
strToken[index] = ch;
index++;
i++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
strToken[index] = '\0';
}
else if (ch == '*') //识别乘号和指针变量
{
if (L1[i - 1] == ' ' || L1[i - 1] == ';')
{
ch = L1[GetBC(L1, i)];
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
while (IsIdentifier())
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
}
strToken[index] = '\0';
}
else
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
}
else if (ch == '/')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == ':')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
strToken[index] = '\0';
}
else
{
strToken[index] = '\0';
}
}
else if (ch == '<')
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '>')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
//strToken[index] = '\0';
}
else if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
// strToken[index] = '\0';
}
strToken[index] = '\0';
}
else if (ch == '>')
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
strToken[index] = '\0';
}
else if (ch == '=')
{
strToken[index] = ch;
index++;
i++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '=')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
strToken[index] = '\0';
}
else if (ch == ';')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == '(')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == ')')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == '{')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == '}')
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
else if (ch == '|')
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '|')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
strToken[index] = '\0';
}
}
else if (ch == '"')
{
i++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
while (ch != '"')
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
}
strToken[index] = '\0';
res_file = InserRes("string", strToken, res_file);
WritedFlag = 1;
i++;
ch = L1[i];
}
else if (ch == '&')
{
strToken[index] = ch;
i++;
index++;
if (L1[i] == '\0')
{
fgets(temp, size, fp);
strcpy(L1, temp);
i = 0;
}
ch = L1[i];
if (ch == '&')
{
strToken[index] = ch;
i++;
index++;
ch = L1[i];
}
strToken[index] = '\0';
}
else
{
strToken[index] = ch;
index++;
strToken[index] = '\0';
i++;
ch = L1[i];
}
if ((!WritedFlag) && (strcmp(" ", strToken) != 0))
{
res_file = InserRes(FindRes(strToken), strToken, res_file);
printf("<%s\t,\t%s>\n", FindRes(strToken), strToken);
}
WritedFlag = 0; //找出一个字符串就要置0
index = 0;
memset(strToken, 0, sizeof strToken);
}
}
printf("finish!\n");
}
//预处理程序
void Yu(char F_open[], char F_out[])
{
FILE *fp;
fp = fopen(F_open, "r"); //从该文件中读取源程序
FILE *op;
op = fopen(F_out, "w"); //将处理源程序的结果放在该文件中
if (fp != NULL)
{
char ch = fgetc(fp);
while (ch != EOF)
{
switch (ch)
{
case '\n': //去掉换行,读取文件的时候,自动将'\r''\n'合并成了'\n',所以下面的case '\r'可以省略
ch = fgetc(fp);
break;
case '\r': //去掉回车
ch = fgetc(fp);
break;
case ' ': //将多个空格合并成一个
printf("%c", ch);
fputc(ch, op);
ch = fgetc(fp);
while (ch != EOF && ch == ' ')
{
ch = fgetc(fp);
}
break;
case '/': //去掉多行注释和单行注释
ch = fgetc(fp);
if (ch == '/') //去掉单行注释
{
ch = fgetc(fp);
while (ch != EOF && ch != '\n')
{
ch = fgetc(fp);
}
ch = fgetc(fp); //while结束时ch等于回车换行,所以再往后读一个字符
}
else if (ch == '*') //去掉多行注释
{
ch = fgetc(fp);
while (1)
{
while (ch != '*')
{
ch = fgetc(fp);
}
ch = fgetc(fp);
if (ch == '/')
{
ch = fgetc(fp); //向后读一个字符,然后退出while循环
break;
}
}
}
else
{
printf("%c", '/');
fputc('/', op);
}
break;
case '\t': //去掉tab
ch = getc(fp);
break;
default:
printf("%c", ch);
fputc(ch, op);
ch = fgetc(fp);
break;
}
}
}
else
{
printf("the file can not open\n");
}
fclose(fp);
fclose(op);
printf("\n\nYu finish!\n\n");
}
mian.c
大家可以自行新建文本文件,只需要把对应的文件地址改一下即可,具体说明在注释中
#include "fun.h"
/*
1、新加的枚举类型和共用体类型,识别struct,enum
2、新加或运算和与运算,识别&&和||
3、++和--,==
4、识别!+,-=,+=
5、识别浮点数
6、识别指数
7、识别指针变量
8、识别字符串
9、去掉多行注释
10、识别错误信息,数字后面跟有字母,123fds(这个有个bug,识别不了16进制的数字,比如123EFH,他其实是一个16进制数,但是也会当作错误信息)
11、对处理程序的大小没有限制
*/
int main()
{
Yu("E:\\test\\shiyan1\\test.txt", "E:\\test\\shiyan1\\out.txt");//传入文件地址即可,第一个文件里面存的是要处理的源程序,第二个文件存的是预处理过后的程序
Process("E:\\test\\shiyan1\\out.txt", "E:\\test\\shiyan1\\result.txt", "E:\\test\\shiyan1\\errors.txt");//第一个文件是处理过后的程序,第二个文件存的是最终的结果,第三个文件存的是错误信息
return 0;
}
写在最后,如果大家觉得这篇文章对你有帮助的话,还请大家赞一下下啦 : )