【实验目的】
(1)理解词法分析在编译程序中的作用 (2)加深对有穷自动机模型的理解 (3)掌握词法分析程序的实现方法和技术
【实验内容】
对一个简单语言的子集编制一个一遍扫描的词法分析程序。
【实验要求】
(1)待分析的简单语言的词法
1) 关键字 begin if then while do end
2) 运算符和界符 := + - * / < >= <> = ; ( ) #
3) 其他单词是标识符(ID)和整形常数(NUM),通过以下正规式定义: ID=letter(letter|digit)* NUM=digitdigit*
4) 空格由空白、制表符和换行符组成。空格一般用来分隔 ID、NUM、运算符、界符和 关键字,词法分析阶段通常被忽略。
(2)各种单词符号对应的种别编码
(3)词法分析程序的功能
输入:所给文法的源程序字符串
输出:二元组(syn,token 或 sum)构成的序列。
syn 为单词种别码; token 为存放的单词自身字符串; sum 为整形常数。
【实验代码】
头文件.hpp
#ifndef TEST_HPP
#define TEST_HPP
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <algorithm>
#include <cctype>
std::unordered_map<std::string, int> TOKEN_MAPPING = {
{"begin", 1},
{"if", 2},
{"then", 3},
{"while", 4},
{"do", 5},
{"end", 6},
{"letter(letter|digit)*", 10},
{"digitdigit*", 11},
{"+", 13},
{"-", 14},
{"*", 15},
{"/", 16},
{":", 17},
{":=", 18},
{"<", 20},
{"<>", 21},
{"<=", 22},
{">", 23},
{">=", 24},
{"=", 25},
{";", 26},
{"(", 27},
{")", 28},
{"#", 0}};
// 函数声明
bool isLetter(char c);
bool isDigit(char c);
bool isOperator(char c);
bool isDelimiter(char c);
bool isValidIdentifierChar(char c);
bool isAssignmentOperator(const std::string &token);
// 逻辑函数实现
void Scanner(std::string input)
{
for (size_t i = 0; i < input.size(); ++i)
{
char c = input[i];
if (isspace(c)) // Skip whitespace
continue;
else if (isLetter(c)) // Variable or keyword
{
std::string token = "";
while (i < input.size() && isValidIdentifierChar(input[i]))
token += input[i++];
--i; // 循环中增加了i,这里需要减回来
auto it = TOKEN_MAPPING.find(token);
if (it != TOKEN_MAPPING.end())
std::cout << "(" << it->second << ", " << token << ") ";
else
std::cout << "(" << TOKEN_MAPPING["letter(letter|digit)*"] << ", " << token << ") ";
}
else if (isDigit(c)) // Number
{
std::string token = "";
while (i < input.size() && isDigit(input[i]))
token += input[i++];
--i; // 循环中增加了i,这里需要减回来
std::cout << "(" << TOKEN_MAPPING["digitdigit*"] << ", " << token << ") ";
}
else if (isOperator(c) || c == ':') // Operator or start of assignment operator
{
std::string token = "";
token += c;
if (c == ':' && i + 1 < input.size() && input[i + 1] == '=')
token += input[++i]; // 捕获 :=
if (c == '<' || c == '>')
if (i + 1 < input.size() && input[i + 1] == '=')
token += input[++i]; // Capture <= or >=
auto it = TOKEN_MAPPING.find(token);
if (it != TOKEN_MAPPING.end())
std::cout << "(" << it->second << ", " << token << ") ";
}
else if (isDelimiter(c)) // Delimiter
{
std::string token = "";
token += c;
std::cout << "(" << TOKEN_MAPPING[token] << ", " << token << ") ";
}
}
std::cout << "\nProgram ended." << std::endl;
}
// 函数定义
bool isLetter(char c)
{
return std::isalpha(c);
}
bool isDigit(char c)
{
return std::isdigit(c);
}
bool isOperator(char c)
{
return c == '+' || c == '-' || c == '*' || c == '/' || c == '>' || c == '<';
}
bool isDelimiter(char c)
{
return c == '(' || c == ')' || c == ';' || c == '#';
}
bool isValidIdentifierChar(char c)
{
return isLetter(c) || isDigit(c);
}
bool isAssignmentOperator(const std::string &token)
{
return token == ":=";
}
#endif // TEST_HPP
源文件.cpp
-
#include "test.hpp" #include <iostream> using namespace std; int main() { string prog; do { cout << "Please input string: "; getline(cin, prog); if (prog.length() >= 2 && prog[prog.length() - 1] != '#') cout << "\nInput string should end with '#'." << endl; else Scanner(prog); } while (prog.length() < 2 || prog[prog.length() - 1] != '#'); return 0; } // begin x:=9;if x>0 then x:=2*x+1/3;end#