词法分析器的功能是输入源程序,输出单词符号。单词符号是一个程序语言的基本语法符号。程序语言的符号一般可以定义为以下几种:
- 关键字 是由程序语言定义的具有固定意义的标识符,也称这些标识符为保留字或基本字。例如:c++语言中的define、do、for、while、if、else等
- 标识符用来表示各种名字,如变量名、数组名、函数名等
- 常数 常数一般有整形、实型、布尔型、文字型等。
- 运算符 如:+、-、>、<、*、/等
界符 如:逗号、分号、括号等
词法分析器所输出的单词符号常常表示成二元式(单词种别,单词符号属性值)的形式。
下面是一个由c++写成的词法分析器,使用环境为vs2015:
#include "stdafx.h"
#include <iostream>
#include<string>
using namespace std;
#define MAX 22
char ch = ' ';
string key[19] = { "int","long","double","string","bool","char","final","if","else","switch","case",
"then","return","for","while","do","const","include","define" };
//判断是否为保留字
bool IsKey(string c)
{
int i;
for (i = 0; i < MAX; i++) {
if (key[i].compare(c) == 0) return true;
}
return false;
}
//判断是否为字母
bool IsLetter(char c)
{
if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))) return true;
else return false;
}
//判断是否为数字
bool IsDigit(char c)
{
if (c >= '0'&&c <= '9') return true;
else return false;
}
//扫描文本
void scan(FILE *fpin) {
string arr = "";
while ((ch = fgetc(fpin)) != EOF) {
arr = "";
if (ch == ' ' || ch == '\t' || ch == '\n') {}
else if (IsLetter(ch)) {
while (IsLetter(ch) || IsDigit(ch)) {
if ((ch <= 'Z') && (ch >= 'A')) ch = ch + 32;
arr = arr + ch;
ch = fgetc(fpin);
}
fseek(fpin, -1L, SEEK_CUR);
if (IsKey(arr)) { cout << arr << "\t$关键字" << endl; }
else cout << arr << "\t$普通标识符" << endl;
}
else if (IsDigit(ch)) {
while (IsDigit(ch) || ch == '.'&&IsDigit(fgetc(fpin))) {
arr = arr + ch;
ch = fgetc(fpin);
}
fseek(fpin, -1L, SEEK_CUR);
cout << arr << "\t$无符号实数" << endl;
}
else switch (ch) {
case'+':
case'-':
case'*':
case'=':
case'/':cout << ch << "\t$运算符" << endl; break;
case'(':
case')':
case'[':
case']':
case';':
case'.':
case',':
case'{':
case'#':
case'"':
case'\'':
case'}':cout << ch << "\t$界符" << endl; break;
case':': {ch = fgetc(fpin);
if (ch == '=') cout << ":=" << "\t$运算符" << endl;
else {
cout << "=" << "\t$运算符" << endl;;
fseek(fpin, -1L, SEEK_CUR);
}
}break;
case'>': {ch = fgetc(fpin);
if (ch == '=') cout << ">=" << "\t$运算符" << endl;
if (ch == '>')cout << ">>" << "\t$输入控制符" << endl;
else {
cout << ">" << "\t$运算符" << endl;
fseek(fpin, -1L, SEEK_CUR);
}
}break;
case'<': {ch = fgetc(fpin);
if (ch == '=')cout << "<=" << "\t$运算符" << endl;
else if (ch == '<')cout << "<<" << "\t$输出控制符" << endl;
else if (ch == '>') cout << "<>" << "\t$运算符" << endl;
else {
cout << "<" << "\t$运算符" << endl;
fseek(fpin, -1L, SEEK_CUR);
}
}break;
default: cout << ch << "\t$无法识别字符" << endl;
}
}
}
int main()
{
char fn[30];
FILE *fpin;
for (;;) {
cout << "请输入文件名:" ;
cin >> fn;
fopen_s(&fpin, fn, "r");
if (fpin != NULL) break;
else cout << "文件路径错误!请输入源文件名(包括路径和后缀名):";
}
cout << "分析结果如下:" << endl;
scan(fpin);
fclose(fpin);
return 0;
}
运行结果如下: