目录
一、实验目的
理解词法分析在整个编译过程中的作用,掌握词法分析的基本原理及运行过程,并实现一个简单的词法分析器
二、实验内容
根据给定的编程语言文法(见下表),编写一个简单的词法分析器,要求该词法分析器读入指定的待分析源代码后,以“(单词/符号编码 , 单词/符号)”的格式,依次输出该源代码中所有的单词与符号。
语言文法:单词与符号分为五类——保留字、标识符、常数、运算符、界符,分类及编码的具体详情见下列各表。
保留字:
单词/符号 | 编码 | 正规式 |
begin | beginsym | begin |
end | endsym | end |
input | inputsym | input |
output | outputsym | output |
if | ifsym | if |
then | thensym | then |
else | elsesym | else |
elif | elifsym | elif |
do | dosym | do |
while | whilesym | while |
break | breaksym | break |
var | varsym | var |
and | andsym | and |
or | orsym | or |
not | notsym | not |
true | truesym | true |
false | falsesym | false |
标识符:
单词/符号 | 编码 | 正规式 |
<标识符> | ident | (字母)(字母|数字)* |
常数:
单词/符号 | 编码 | 正规式 |
<常数> | digit | (数字)(数字)* |
运算符:
单词/符号 | 编码 | 正规式 |
+ | plus | + |
- | minus | - |
* | times | * |
/ | divide | / |
% | mod | % |
= | becomes | = |
== | eql | == |
!= | neq | != |
< | lss | < |
<= | leq | <= |
> | gtr | > |
>= | geq | >= |
界符:
单词/符号 | 编码 | 正规式 |
( | lparen | ( |
) | rparen | ) |
[ | lbracket | [ |
] | rbracket | ] |
{ | lcurbkt | { |
} | rcurbkt | } |
, | comma | , |
. | period | . |
输入样例:
begin
var a = 6
if a%2 != 0
then a=a+1
else a=a-1
end
输出样例:
(beginsym , begin)
(varsym , var)
(ident , a)
(becomes , =)
(digit , 6)
(ifsym , if)
(ident , a)
(mod , %)
(digit , 2)
(neq , !=)
(digit , 0)
(thensym , then)
(ident , a)
(becomes , =)
(ident , a)
(plus , +)
(digit , 1)
(elsesym , else)
(ident , a)
(becomes , =)
(ident , a)
(minus , -)
(digit , 1)
(endsym , end)
注:本次实验会进行大量的字符串处理及查表操作,建议各位同学使用C++、Java或python语言完成作业,不建议使用C语言。
三、实验环境
Windows或Linux系统,gcc 7.3.0,g++ 7.3.0,Java JDK 1.8,Python 3.6(以上编译器\解释器均可使用更高版本)。
四、数据准备
data文件夹下的三个txt文件,为本次实验中需要进行词法分析的源程序代码数据。
五、词法分析器程序设计描述
六、词法分析器程序实现展示
关键模块:
- 初始化数据keyWord、keyCode、symbolCode
class Classfier{ private: map<string,string>keyCode,symbolCode; set<string>keyWord;
public: Classfier(){
keyCode.insert(pair<string,string>("begin","beginsym")); keyCode.insert(pair<string,string>("end","endsym")); keyCode.insert(pair<string,string>("input","inputsym")); keyCode.insert(pair<string,string>("output","outputsym")); keyCode.insert(pair<string,string>("if","ifsym")); keyCode.insert(pair<string,string>("then","thensym")); keyCode.insert(pair<string,string>("else","elsesym")); keyCode.insert(pair<string,string>("elif","elifsym")); keyCode.insert(pair<string,string>("do","dosym")); keyCode.insert(pair<string,string>("while","whilesym")); keyCode.insert(pair<string,string>("break","breaksym")); keyCode.insert(pair<string,string>("var","varsym")); keyCode.insert(pair<string,string>("and","andsym")); keyCode.insert(pair<string,string>("or","orsym")); keyCode.insert(pair<string,string>("not","notsym")); keyCode.insert(pair<string,string>("true","truesym")); keyCode.insert(pair<string,string>("false","falsesym"));
keyWord.insert("begin"); keyWord.insert("end"); keyWord.insert("input"); keyWord.insert("output"); keyWord.insert("if"); keyWord.insert("then"); keyWord.insert("else"); keyWord.insert("elif"); keyWord.insert("do"); keyWord.insert("while"); keyWord.insert("break"); keyWord.insert("var"); keyWord.insert("and"); keyWord.insert("or"); keyWord.insert("not"); keyWord.insert("true"); keyWord.insert("false");
symbolCode.insert(pair<string,string>("+","puls")); symbolCode.insert(pair<string,string>("-","minus")); symbolCode.insert(pair<string,string>("*","times")); symbolCode.insert(pair<string,string>("/","divide")); symbolCode.insert(pair<string,string>("%","mode")); symbolCode.insert(pair<string,string>("=","becomes")); symbolCode.insert(pair<string,string>("==","eql")); symbolCode.insert(pair<string,string>("!=","neq")); symbolCode.insert(pair<string,string>("<","lss")); symbolCode.insert(pair<string,string>("<=","leq")); symbolCode.insert(pair<string,string>(">","gtr")); symbolCode.insert(pair<string,string>(">=","geq")); symbolCode.insert(pair<string,string>("(","lparen")); symbolCode.insert(pair<string,string>(")","rparen")); symbolCode.insert(pair<string,string>("[","lbracket")); symbolCode.insert(pair<string,string>("]","rbracket")); symbolCode.insert(pair<string,string>("{","lcurbkt")); symbolCode.insert(pair<string,string>("}","lcurbkt")); symbolCode.insert(pair<string,string>(",","comma")); symbolCode.insert(pair<string,string>(".","period")); } |
- 判断字符是否为数字或字母函数、判断字符串是否为数字或关键字函数、输出函数
bool isLetter(char ch){ if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')){ return true; }else{ return false; } } bool isNumber(char ch){ if(ch>='0'&&ch<='9'){ return true; }else{ return false; } } void printResult(string code,string word){
cout<<"("<<code<<","<<word<<")\n"; } |
代码:
#include<iostream> #include<fstream> #include<string> #include<set> #include<map> using namespace std; #define IDENT "ident" #define DIGIT "digit" #define ERROR "error" bool isLetter(char ch){ if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')){ return true; }else{ return false; } } bool isNumber(char ch){ if(ch>='0'&&ch<='9'){ return true; }else{ return false; } } void printResut1(string code,string word){
cout<<"("<<code<<","<<word<<")\n"; } class Classfier{ private: map<string,string>keyCode,symbolCode; set<string>keyWord;
public: Classfier(){
keyCode.insert(pair<string,string>("begin","beginsym")); keyCode.insert(pair<string,string>("end","endsym")); keyCode.insert(pair<string,string>("input","inputsym")); keyCode.insert(pair<string,string>("output","outputsym")); keyCode.insert(pair<string,string>("if","ifsym")); keyCode.insert(pair<string,string>("then","thensym")); keyCode.insert(pair<string,string>("else","elsesym")); keyCode.insert(pair<string,string>("elif","elifsym")); keyCode.insert(pair<string,string>("do","dosym")); keyCode.insert(pair<string,string>("while","whilesym")); keyCode.insert(pair<string,string>("break","breaksym")); keyCode.insert(pair<string,string>("var","varsym")); keyCode.insert(pair<string,string>("and","andsym")); keyCode.insert(pair<string,string>("or","orsym")); keyCode.insert(pair<string,string>("not","notsym")); keyCode.insert(pair<string,string>("true","truesym")); keyCode.insert(pair<string,string>("false","falsesym"));
keyWord.insert("begin"); keyWord.insert("end"); keyWord.insert("input"); keyWord.insert("output"); keyWord.insert("if"); keyWord.insert("then"); keyWord.insert("else"); keyWord.insert("elif"); keyWord.insert("do"); keyWord.insert("while"); keyWord.insert("break"); keyWord.insert("var"); keyWord.insert("and"); keyWord.insert("or"); keyWord.insert("not"); keyWord.insert("true"); keyWord.insert("false");
symbolCode.insert(pair<string,string>("+","puls")); symbolCode.insert(pair<string,string>("-","minus")); symbolCode.insert(pair<string,string>("*","times")); symbolCode.insert(pair<string,string>("/","divide")); symbolCode.insert(pair<string,string>("%","mode")); symbolCode.insert(pair<string,string>("=","becomes")); symbolCode.insert(pair<string,string>("==","eql")); symbolCode.insert(pair<string,string>("!=","neq")); symbolCode.insert(pair<string,string>("<","lss")); symbolCode.insert(pair<string,string>("<=","leq")); symbolCode.insert(pair<string,string>(">","gtr")); symbolCode.insert(pair<string,string>(">=","geq")); symbolCode.insert(pair<string,string>("(","lparen")); symbolCode.insert(pair<string,string>(")","rparen")); symbolCode.insert(pair<string,string>("[","lbracket")); symbolCode.insert(pair<string,string>("]","rbracket")); symbolCode.insert(pair<string,string>("{","lcurbkt")); symbolCode.insert(pair<string,string>("}","lcurbkt")); symbolCode.insert(pair<string,string>(",","comma")); symbolCode.insert(pair<string,string>(".","period")); }
~Classfier(){ keyCode.empty(); symbolCode.empty(); keyWord.empty(); }
bool isKeyWord(string word){ set<string>::iterator it = keyWord.find(word); return !(it == keyWord.end()); }
bool isDigit(string word){ int size = word.size(); for(int i=0;i<size;++i){ if(!isNumber(word[i])){ return false; } } return true; }
string getKeyWordCode(string word){ return keyCode[word]; }
string getSymbolCode(string symbol){ return symbolCode[symbol]; } }; int main(){ string path; Classfier clsfier; cout<<"请输入要进行词法分析的文件名:"; cin>>path; fstream file; file.open(path.c_str(),ios::in); while(!file.is_open()){ cout<<"文件打开失败,请重新输入:"; cin>>path; file.open(path.c_str(), ios::in); } string line; while(getline(file,line)){ if(line.empty()) continue; int size = line.size(),i=0; while(i<size){ if(line[i]==' '){ while(i<size&&line[i]==' '){ ++i; } }else if(isLetter(line[i])){ string word; while(i<size&&(isLetter(line[i])||isNumber(line[i]))){ word.push_back(line[i++]); } if(clsfier.isKeyWord(word)){ printResut1(clsfier.getKeyWordCode(word),word); }else{ printResut1(IDENT,word); } }else if(isNumber(line[i])){ string word; while(i<size&&(isLetter(line[i])||isNumber(line[i]))){ word.push_back(line[i++]); } if(clsfier.isDigit(word)){ printResut1(DIGIT,word); }else{ printResut1(ERROR,word); } }else if(line[i]=='='||line[i]=='<'||line[i]=='<'){
string word; word.push_back(line[i++]); if(line[i]=='='){ word.push_back(line[i++]); printResut1(clsfier.getSymbolCode(word),word); }else{ printResut1(clsfier.getSymbolCode(word),word); } }else if(line[i]=='!'){ string word; word.push_back(line[i++]); if(line[i]=='='){ word.push_back(line[i++]); printResut1(clsfier.getSymbolCode(word),word); }else{ printResut1(ERROR,word); } }else{ string word; word.push_back(line[i++]); switch(word[0]){ case'+': case'-': case'*': case'/': case'%': case'(': case')': case'[': case']': case'{': case'}': case',': case'.': printResut1(clsfier.getSymbolCode(word),word); break; default: printResut1(ERROR,word); break; } } } } file.close(); return 0; } |
七、实验结果及分析
第一组实验:
运行结果:
第二组实验:
实验结果:
第三组实验:
输出结果:
八、实验心得体会
本次实验进行了大量的字符串处理及查表操作,需要思路清晰,以及在细节上的处理,最难的地方在于找逻辑关系。通过如何设计、编制并调试词法分析程序,把理论知识应用于实验中,理解了词法分析在整个编译过程中的作用,掌握了词法分析的基本原理及运行过程,也让我重新熟悉了C++的相关内容,对以后的编程有很大的帮助。