实验目的
理解词法分析器的任务和工作原理;掌握词法分析器的构建过程,并能够针对给定语言的词法规则,使用某种高级编程语言实现其词法分析器。
实验内容
给定表1所示的一个简单语言的词法规则描述,其中,标识符是以字母开头、以字母和数字组成的任意符号串,常数为整数,即以数字组成的符号串。请完成以下任务:
- 画出识别该语言词法规则的状态转换图;
- 依据状态转换图,设计并编制词法分析程序,实现从输入源程序中,识别出各类单词,即关键字、标识符、常数、运算符、界符五大类,并输出各个单词的种别码和单词符号自身的值。
- 设计恰当的测试用例对各类单词的识别进行测试。
单词符号 | 种别码 | 单词符号 | 种别码 |
---|---|---|---|
void | 101 | >= | 207 |
main | 102 | < | 208 |
int | 103 | <= | 209 |
char | 104 | == | 210 |
if | 105 | <> | 211 |
else | 106 | ++ | 212 |
for | 107 | – | 213 |
while | 108 | ( | 301 |
+ | 201 | ) | 302 |
- | 202 | { | 303 |
* | 203 | } | 304 |
/ | 204 | ; | 305 |
= | 205 | 标识符 | 400 |
> | 206 | 常数 | 500 |
程序源代码
#include<iostream>
#include<cstring>
#include<cstdio>
using namespace std;
const int MAX = 8; //关键字个数
char ch = 0; //当前字符
string words; //当前单词
int token; //当前单词的种别码
int row; //当前行号
//关键字表
pair<string, int> keywords[MAX] = { {"void", 101} , {"main", 102} , {"int", 103} , {"char", 104} , {"if", 105} , {"else", 106} , {"for", 107} , {"while", 108} };
//种别码对应的字符串
string kinds[5] = { "关键字" , "标识符" , "常数" , "运算符" , "界符" };
//判断是否为字母
bool isLetter(char letter)
{
if ((letter >= 'a' && letter <= 'z') || (letter >= 'A' && letter <= 'Z')) return true;
return false;
}
//判断是否为数字
bool isDigit(char digit)
{
if (digit >= '0' && digit <= '9') return true;
return false;
}
//判断是否为关键字
int isKeyword()
{
for (int i = 0; i < MAX; i++)
{
if (words.compare(keywords[i].first) == 0) return keywords[i].second;
}
return -1;
}
//词法分析
void lex_analyse(FILE*& fpin , FILE*& fpout)
{
//从文件中读取字符,直到文件结束
while ((ch = fgetc(fpin)) != EOF)
{
//跳过空白符
if (ch == ' ' || ch == '\t' || ch == '\n')
{
//如果是换行符,行号加1
if (ch == '\n') row ++ ;
}
//如果是字母,判断是否为关键字
else if (isLetter(ch))
{
//当前单词清空
words.clear();
//当前字符为字母或数字,加入当前单词,若不是,跳出循环,并回退一个字符
while (isLetter(ch) || isDigit(ch))
{
words.push_back(ch);
ch = fgetc(fpin);
}
fseek(fpin,-1L,SEEK_CUR);
//判断是否为关键字
token = isKeyword();
if ( token != -1 )
{
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[0].c_str(),words.c_str());
}
else
{
token = 400;
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[1].c_str(),words.c_str());
}
}
//如果是数字,判断是否为常数
else if ( isDigit(ch) )
{
//当前单词清空
words.clear();
token = 500;
//当前字符为数字,加入当前单词,若不是,跳出循环,并回退一个字符
while (isDigit(ch))
{
words.push_back(ch);
ch = fgetc(fpin);
}
fseek(fpin,-1L,SEEK_CUR);
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[2].c_str(),words.c_str());
}
//如果是运算符或界符
else switch (ch)
{
//运算符
case '+':
ch = fgetc(fpin);
if (ch == '+')
{
token = 212;
words = "++";
}
else
{
fseek(fpin,-1L,SEEK_CUR);
token = 201;
words = "+";
}
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
case '-':
ch = fgetc(fpin);
if (ch == '-')
{
token = 213;
words = "--";
}
else
{
fseek(fpin,-1L,SEEK_CUR);
token = 202;
words = "-";
}
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
case '*':
token = 203;
words = "*";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
case '/':
token = 204;
words = "/";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
case '=':
ch = fgetc(fpin);
if (ch == '=')
{
token = 210;
words = "==";
}
else
{
fseek(fpin,-1L,SEEK_CUR);
token = 205;
words = "=";
}
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
case '>':
ch = fgetc(fpin);
if (ch == '=')
{
token = 207;
words = ">=";
}
else
{
fseek(fpin,-1L,SEEK_CUR);
token = 206;
words = ">";
}
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
case '<':
ch = fgetc(fpin);
if (ch == '=')
{
token = 209;
words = "<=";
}
else if (ch == '>')
{
token = 211;
words = "<>";
}
else
{
fseek(fpin,-1L,SEEK_CUR);
token = 208;
words = "<";
}
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[3].c_str(),words.c_str());
break;
//界符
case '(':
token = 301;
words = ")";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[4].c_str(),words.c_str());
break;
case ')':
token = 302;
words = ")";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[4].c_str(),words.c_str());
break;
case '{':
token = 303;
words = "{";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[4].c_str(),words.c_str());
break;
case '}':
token = 304;
words = "}";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[4].c_str(),words.c_str());
break;
case ';':
token = 305;
words = ";";
fprintf(fpout,"%d\t%s\t%s\n",token,kinds[4].c_str(),words.c_str());
break;
default: fprintf(fpout,"在第%d行无法识别字符:%c\n",row,ch);
}
}
}
int main()
{
string filename;
cout << "请输入要读取得文件名: ";
cin>>filename;
FILE *fpin = fopen(filename.c_str(), "r") , *fpout = fopen((filename + ".out.txt").c_str(), "w");
if (fpin == NULL)
{
cout << "文件打开失败" << endl;
return 0;
}
else cout << "文件打开成功" << endl;
lex_analyse(fpin, fpout);
fclose(fpin);
fclose(fpout);
return 0;
}