【问题描述】通过设计c语言常见单词的正规文法或正规式,而后得到NFA,再确定化得到DFA,根据DFA的转换矩阵或转换图,用c++语言实现词法分析器。
【输入形式】输入一段完整的c语言程序
【输出形式】各类单词的token字
【样例输入】int main(){
int a = 10;
double b = -20.9;
if(a<=b)
a+=b;
return a;
}
【样例输出】
line1:(type, int)
line1:(keyword, main)
line1:(bracket, ()
line1:(bracket, ))
line1:(bracket, {)
line2:(type, int)
line2:(identify, a)
line2:(OPT, =)
line2:(integer, 10)
line2:(bracket, ;)
line3:(type, double)
line3:(identify, b)
line3:(OPT, =)
line3:(decimal, -20.9)
line3:(bracket, ;)
line4:(keyword, if)
line4:(bracket, ()
line4:(identify, a)
line4:(OPT, <=)
line4:(identify, b)
line4:(bracket, ))
line5:(identify, a)
line5:(OPT, +=)
line5:(identify, b)
line5:(bracket, ;)
line6:(keyword, else)
line6:(identify, a)
line6:(OPT, =)
line6:(integer, 0)
line6:(bracket, ;)
line7:(keyword, return)
line7:(identify, a)
line7:(bracket, ;)
line8:(bracket, })
【样例说明】需要识别的关键字包括void, int, main, double, return, float, if, else, do, while, for, scanf, printf, char, sqrt, abs, 运算符(算术、关系、逻辑、位);需要识别的其他单词有标识符, 整数(十进制形式、指数形式),实数(十进制形式、指数形式),字符串;过滤注释及空格。
【评分标准】根据设计文档的质量、lex文件的正确性,代码的正确性、代码的时间空间复杂度、识别单词的种类等综合评分
构造DFA:
看起来也不是DFA
直接上代码:
#include <iostream>
#include <string.h>
#include <vector>
using namespace std;
vector<string> keyword = { "scanf","printf","if","else","for","while","return","do","main","abs","sqrt","float" };
vector<string> type = { "int","void","char","double","short" };
vector<char> bracket = { ',', '\\', ';', ':', '(', ')', '[', ']', '{', '}', '"', '\'' };
struct Store {
int line = 0;
string token;
string word;
};
vector<Store> msg;
class Compile
{
public:
void scan();
void print();
void Pushmsg(string s);
void JudgeFloat(string s);
private:
int line = 1;
char ch = ' ';
string word;
Store temp;
};
void Compile::Pushmsg(string s)
{
temp.line = line;
temp.token = s;
temp.word = word;
msg.push_back(temp);
word.clear();
}
void Compile::JudgeFloat(string s)
{
word += ch;
ch = getchar();
if (ch == '+' || ch == '-')
{
word += ch;
ch = getchar();
}
if (ch < '1' || ch > '9')
{
cout << "Error at Line " << line << ": Illegal floating point number \"" << word << "\".\n";
exit(-1);
}
else
{
word += ch;
while ((ch = getchar()) && (ch >= '0' && ch <= '9'))
{
word += ch;
}
Pushmsg("float");
}
}
void Compile::scan()
{
/*读取第一个有效字符*/
ch = getchar();
while (ch == ' ' || ch == '\n')
{
if (ch == '\n')
line++;
ch = getchar();
}
while (ch != '\0' && ch != EOF)
{
while (ch == ' ' || ch == '\n')
{
if (ch == '\n')
line++;
ch = getchar();
}
//判断是否整数、小数、浮点数
if (ch == '+' || ch == '-')
{
word += ch;
ch = getchar();
if (ch == '=')
{
word += ch;
Pushmsg("OPT");
ch = getchar();
}
else
if (ch >= '1' && ch <= '9')
{
word += ch;
while ((ch = getchar()) && (ch >= '0' && ch <= '9'))
{
word += ch;
}
Pushmsg("integer");
}
}
if (ch >= '1' && ch <= '9')
{
word += ch;
ch = getchar();
while (ch >= '0' && ch <= '9')
{
word += ch;
ch = getchar();
}
//判度小数、浮点数
if (ch == '.')
{
word += ch;
ch = getchar();
while (ch >= '0' && ch <= '9')
{
word += ch;
ch = getchar();
}
if (ch == 'e')
{
JudgeFloat(word);
}
else
Pushmsg("decimal");
}
else
Pushmsg("integer");
}
int flag = 0;
if (ch == '_' || isalpha(ch))
{
while (isalnum(ch)||ch == '_')
{
word += ch;
ch = getchar();
}
for (vector<string>::iterator it = keyword.begin(); it != keyword.end(); it++)
{
if (word == (*it))
{
Pushmsg("keyword");
flag = 1;
}
}
if (flag == 0)
{
for (vector<string>::iterator it = type.begin(); it != type.end(); it++)
{
if (word == (*it))
{
Pushmsg("type");
flag = 1;
}
}
if (flag == 0)
{
Pushmsg("identify");
}
}
}
if (ch == '/')
{
char temp = getchar();
if (temp == '/')
{
do {
ch = getchar();
} while (ch != '\n');
}
else
if (temp == '*')
{
do {
ch = getchar();
} while (ch != '/');
ch = getchar();
}
else
if (temp == '=')
{
word += "/=";
Pushmsg("OPT");
ch = getchar();
}
else
{
word += '/';
Pushmsg("OPT");
ch = getchar();
}
}
// 0 开头的情况
if (ch == '0')
{
word += ch; ch = getchar();
Pushmsg("integer");
}
//对typeidentify进行判断
char temp;
if (ch == '%' || ch == '&')
{
temp = getchar();
if (isalnum(temp))
{
word += ch;
word += temp;
Pushmsg("typeidentify");
ch = getchar();
}
}
int flag2 = 0;
//对bracket进行匹配
for (vector<char>::iterator it = bracket.begin(); it != bracket.end(); it++)
{
if (ch == (*it))
{
word += ch; ch = getchar();
Pushmsg("bracket");
break;
flag2 = 1;
}
}
//bracket没有匹配成功,对OPT进行判断
if (flag2 == 0)
{
switch (ch)
{
case '*':
case '=':
case '<':
case '>':
case '!':
temp = getchar();
if (temp == '=') {
word += ch;
word += temp;
ch = getchar();
Pushmsg("OPT");
break;
}
else {
word += ch; ch = getchar();
Pushmsg("OPT");
break;
}
break;
default:
break;
}
}
}
}
void Compile::print()
{
for (vector<Store>::iterator it = msg.begin(); it != msg.end(); it++)
cout << "line" << (*it).line << ":(" << (*it).token << ", " << (*it).word << ')' << endl;
}
int main()
{
Compile cp;
cp.scan();
cp.print();
}