被分析的源程序放在txt文件中,文件内容如下:
int main()
{
int _a123b=123.4+123.4E-3+123.+123.0E+5;
return 0;
}
本词法分析程序主要识别关键字、标识符、数值常量(包括整数、小数、指数常量),识别指数常量时,使用了大量的goto语句(参照陈意云教材上指数常量的自动机)。其它类型的单词暂时未识别,可以自己添加更多的分支结构识别更多类型的的单词。
#include <ctype.h>
#include <vector>
#include <algorithm>
#include <iostream>
using namespace std;
const int maxLineLength = 1000; //code txt文件中每行最多1000个字符
const int maxTokenLength = 20; //每个单词最多20个字符
int main()
{
vector<string> keywords = { "main","return","int","float","double"/* …… more keywords */ };
FILE* fp = NULL;
fopen_s(&fp, "e:\\code.txt", "r");
if (fp == NULL)
{
cout << "open file error!" << endl;
return -1;
}
char line[maxLineLength];
while (!feof(fp))
{
fgets(line, maxLineLength, fp);
int i = 0;
while (line[i] != '\0')
{
char token[maxTokenLength];
int j = 0;
//识别标识符和关键字
//cout << line[i];
if (isalpha(line[i]) || line[i] == '_') //以字母或_开头
{
token[j++] = line[i++]; //这一行去掉对程序没影响,但是为了和自动机保持一致,最好保留
while (isalpha(line[i]) || isdigit(line[i]) || line[i] == '_') //任意个(0个或多个)字母或数字或_,对应自动机的闭包(alpha|digit|_)*
{
token[j++] = line[i++];
}
token[j] = '\0';
//判断是标识符还是关键字
vector<string>::iterator iter = find(keywords.begin(), keywords.end(), token);
if (keywords.end() == iter) //iter == keywords.end()
{
cout << "标识符:" << token << endl;
}
else
{
cout << "关键字:" << token << endl;
}
}
//识别数值常量,包括:整数常量、小数常量、指数常量(根据书上的自动机,用goto语句实现状态跳转)
else if (isdigit(line[i])) //12
{
token[j++] = line[i++];
while (isdigit(line[i])) //13
{
token[j++] = line[i++];
}
if (line[i] == 'E')
{
token[j++] = line[i++];
goto L16;
}
else if (line[i] == '.') //14
{
token[j++] = line[i++];
if (isdigit(line[i])) //14
{
token[j++] = line[i++];
}
else //14
{
cout << "未知类型:";
goto L19;
}
while (isdigit(line[i]))
{
token[j++] = line[i++];
}
if (line[i] == 'E')
{
token[j++] = line[i++];
goto L16;
}
else
{
cout << "小数常量" << ":";
goto L19;
}
}
else //13跳转到19
{
cout << "整数常量:";
goto L19;
}
L16:
if (isdigit(line[i]))
{
token[j++] = line[i++];
goto L18;
}
else if (line[i] == '+' || line[i] == '-')
{
token[j++] = line[i++];
if (isdigit(line[i])) //17
{
token[j++] = line[i++];
}
else //17
{
cout << "未知类型:";
goto L19;
}
goto L18;
}
else
{
cout << "未知类型:";
goto L19;
}
L18:
while (isdigit(line[i]))
{
token[j++] = line[i++];
}
cout << "指数常量:";
goto L19;
L19:
token[j] = '\0';
cout << token << endl;
}
//more else if:识别其它类型的单词
else //不属于以上单词的其它字符
{
i++;
}
}
}
if (fp != NULL)
fclose(fp);
return 0;
}
程序运行结果如下: