目录
词法语法分析处理程序
Token.h 用于词法分析
class Token {
private:
string filename;
map<string, string> Category_code = { { "identifier", "IDENFR" },
{ "else", "ELSETK" },
{ "-", "MINU" },
{ "=", "ASSIGN" },
{ "int_constant", "INTCON" },
{ "switch", "SWITCHTK" },
{ "*", "MULT" },
{ ";", "SEMICN" },
{ "char_constant", "CHARCON" },
{ "case", "CASETK" },
{ "/", "DIV" },
{ ",", "COMMA" },
{ "character_string", "STRCON" },
{ "default", "DEFAULTTK" },
{ "<", "LSS" },
{ "(", "LPARENT" },
{ "const", "CONSTTK" },
{ "while", "WHILETK" },
{ "<=", "LEQ" },
{ ")", "RPARENT" },
{ "int", "INTTK" },
{ "for", "FORTK" },
{ ">", "GRE" },
{ "[", "LBRACK" },
{ "char", "CHARTK" },
{ "scanf", "SCANFTK" },
{ ">=", "GEQ" },
{ "]", "RBRACK" },
{ "void", "VOIDTK" },
{ "printf", "PRINTFTK" },
{ "==", "EQL" },
{ "{", "LBRACE" },
{ "main", "MAINTK" },
{ "return", "RETURNTK" },
{ "!=", "NEQ" },
{ "}", "RBRACE" },
{ "if", "IFTK" },
{ "+", "PLUS" },
{ ":", "COLON" } };
public:
//存放键值对
vector<KV> KVS;
//行号
int lineindex = 0;
//错误处理
vector <Error> errors;
//构造函数
Token() { }
//构造函数
~Token() { }
//压入tokem
void push_KV(string category_code, string value, int lineindex)
{
KV temp = KV(category_code, value, lineindex);
KVS.push_back(temp);
}
bool isdigit(char x) //判断数字
{
return x >= '0' && x <= '9';
}
bool isletter(char x) //判断字母
{
return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
}
string to_lower(string str) //大写转小写
{
int i = 0;
while (str[i]) {
if (str[i] > 'A' && str[i] < 'Z') {
str[i] += 32;
}
i++;
}
return str;
}
// 词法分析
string lexical_analysis(string filename)
{
//词法分析输出结果
string out = "";
// 类别码初始化
fstream fin;
fin.open(filename);
if (!fin.is_open()) {
cout << "Could not find the file\n";
cout << "Program terminating\n";
system("pause");
exit(EXIT_FAILURE);
}
string temp = "\0";
//每次读一行
while (getline(fin, temp)) {
//记录行号
lineindex++;
// curr指针,初始指向0
int curr = 0;
// FDA状态
int state = 0;
//临时拼接字符串
string temp_concat = "\0";
while (curr < temp.length() || temp_concat != "\0") {
switch (state) {
case 0:
//去除非常见字符
if (temp[curr] - '\0' <= 32) {
// do nothing
}
//如果是字母或者是下划线进入状态1,标识符或者是基本类型
else if (isletter(temp[curr]) || temp[curr] == '_') {
state = 1;
temp_concat += temp[curr];
}
//如果是数字那么进入状态2,整形常量
else if (isdigit(temp[curr])) {
state = 2;
temp_concat += temp[curr];
} else {
string curr_char = "\0";
curr_char += temp[curr];
switch (temp[curr]) {
case '+':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '-':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '*':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '/':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case ';':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case ',':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '(':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case ')':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '[':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case ']':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '{':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '}':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case ':':
push_KV(Category_code[curr_char], string(1, temp[curr]),
lineindex);
break;
case '\'': // 表明字符常量
state = 3;
break;
case '"': // 表明字符串常量
state = 4;
break;
case '<': //<= ? <
state = 5;
break;
case '>': //>= ? >
state = 6;
break;
case '=': //== ? =
state = 7;
break;
case '!': //! ? !=
state = 8;
break;
default: //錯誤借口
state = -1;
break;
}
}
break;
//标识符或者基本类型
case 1:
//如果继续跟数字或字母继续拼接标识符或者基本类型
if (isalnum(temp[curr]) || temp[curr] == '_') {
temp_concat += temp[curr];
state = 1;
}
//否则标识符拼装完成
else {
//先去判断是不是基本类型,并清空拼接字符串,指针回退
if (Category_code.find(to_lower(temp_concat)) != Category_code.end()) {
push_KV(Category_code[to_lower(temp_concat)], temp_concat, lineindex);
temp_concat = "";
curr--;
state = 0;
}
// 否则就是标识符,刷新字符拼接,返回状态0
else {
push_KV(Category_code["identifier"], temp_concat, lineindex);
state = 0;
temp_concat = "";
curr--;
}
}
break;
//整型常量
case 2:
//如果还是数字,继续拼接整形常量
if (isdigit(temp[curr])) {
temp_concat += temp[curr];
state = 2;
}
//否则整形常量拼接完成,指针回退,返回状态0
else {
push_KV(Category_code["int_constant"], temp_concat, lineindex);
state = 0;
temp_concat = "";
curr--;
}
break;
// 字符常量
case 3:
//如果没有碰到',继续拼接字符常量
if (temp[curr] != '\'') {
//错误处理
if((temp[curr]-'\0'== 42)||(temp[curr]-'\0'== 43)||(temp[curr]-'\0'== 45)||(temp[curr]-'\0'== 47)||(temp[curr]-'\0'<=57&&temp[curr]-'\0'>=48)||(temp[curr]-'\0'<='z'&&temp[curr]-'\0'>='a')||(temp[curr]-'\0'<='Z'&&temp[curr]-'\0'>='A')||temp[curr]=='_')
{
}
else
{
Error error = Error("a",to_string(lineindex));
errors.push_back(error);
}
temp_concat += temp[curr];
state = 3;
}
//如果碰到',转回状态0,清空拼接字符串,但注意这里不需要回退指针
else {
push_KV(Category_code["char_constant"], temp_concat, lineindex);
state = 0;
temp_concat = "";
}
break;
// 字符串常量
case 4:
//如果没有碰到',继续拼接字符常量
if (temp[curr] != '"') {
//错误处理
if((temp[curr]-'\0'== 32)||(temp[curr]-'\0'== 33)||(temp[curr]-'\0'<=126&&temp[curr]-'\0'>=35))
{
}
else
{
Error error = Error("a",to_string(lineindex));
errors.push_back(error);
}
temp_concat += temp[curr];
state = 4;
}
//如果碰到',转回状态0,清空拼接字符串,但注意这里不需要回退指针
else {
push_KV(Category_code["character_string"], temp_concat,lineindex);
state = 0;
temp_concat = "";
}
break;
//<= ? <
case 5:
// <= 回退状态0
if (temp[curr] == '=') {
push_KV(Category_code["<="], "<=",lineindex);
state = 0;
}
//< 回退状态0,回退指针
else {
push_KV(Category_code["<"], "<",lineindex);
state = 0;
curr--;
}
break;
case 6:
// >= 回退状态0
if (temp[curr] == '=') {
push_KV(Category_code[">="],">=",lineindex);
state = 0;
}
//> 回退状态0,回退指针
else {
push_KV(Category_code[">"],">",lineindex);
state = 0;
curr--;
}
break;
case 7:
// == 回退状态0
if (temp[curr] == '=') {
push_KV(Category_code["=="],"==",lineindex);
state = 0;
}
//< 回退状态0,回退指针
else {
push_KV(Category_code["="],"=",lineindex);
state = 0;
curr--;
}
break;
case 8:
// != 回退状态0
if (temp[curr] == '=') {
push_KV(Category_code["!="],"!=",lineindex);
state = 0;
}
//! 报错
else {
state = -1;
}
break;
case -1:
cout << "wrong answer skip" << endl;
curr--;
break;
}
//移动指针
curr++;
}
}
out = "\0";
for (int i = 0; i < KVS.size(); i++) {
out += KVS[i].category_code + " " + KVS[i].value + "\n";
}
return out;
}
//读入验证文件,返回字符串
string lexical_analysis_Verification(string filename)
{
string vout = "";
fstream fin;
fin.open(filename);
if (!fin.is_open()) {
cout << "Could not find the file\n";
cout << "Program terminating\n";
system("pause");
exit(EXIT_FAILURE);
}
string tempout = "";
while (getline(fin, tempout)) {
vout += tempout + "\n";
}
return vout;
}
//输入需要验证的文件夹序号进行验证
void lexical_analysis_Verificate(int number)
{
string out = "";
for (int i = 1; i <= 10; i++) {
string filename1 = "lexical_analysis\\" + to_string(number) + "\\testfile" + to_string(i) + ".txt";
string filename2 = "lexical_analysis\\" + to_string(number) + "\\output" + to_string(i) + ".txt";
string testout = lexical_analysis(filename1);
string verificationout = lexical_analysis_Verification(filename2);
out += ((verificationout == testout) ? "right" : "wrong");
out += " ";
KVS.clear();
KVS.clear();
}
cout << out << endl;
}
};
Grammar.h 语法分析处理
#ifndef __GRAMMAR5_H__
#define __GRAMMAR5_H__
#include <KV.h>
#include <algorithm>
#include <ctype.h>
#include <fstream>
#include <iostream>
#include <map>
#include <string.h>
#include <utility>
#include <vector>
using namespace std;
class Grammar {
private:
public:
//键值对
vector<KV> KVS;
//语法分析结果
vector<KV> outputKVS;
//分辨有返回值还是无返回值的函数
//(标识符,类型标识符)
map<string, string> function_with_or_without_return_map;
//指针
int index = 0;
//行号
int lineindex = 0;
//树的根节点
KV* root = new KV();
Grammar() { }
~Grammar() { }
Grammar(vector<KV>& x)
{
KVS.assign(x.begin(), x.end());
index = 0;
}
void display()
{
for (int i = 0; i < KVS.size(); i++) {
cout << KVS[i].category_code << " " << KVS[i].value << " " << endl;
}
}
//程序主入口 [<常量说明>][<变量说明>]{<有返回值函数定义>|<无返回值函数定义>}<主函数>
void procedure(KV* parent)
{
KV* current = new KV("<程序>",KVS[index].lineindex);
parent->addchildren(current);
// [<常量说明>]
if (constant_description(current)) {
}
// [<变量说明>]
if (variable_description(current)) {
}
// {<有返回值函数定义>|<无返回值函数定义>}
while (index < KVS.size()) {
if (function_definition_with_return_value(current) || function_definition_without_return_value(current)) {
} else {
break;
}
}
// <主函数>
if (main_function(current)) {
}
push_Grammatical_definition("<程序>");
}
// <主函数> ::= void main‘(’‘)’ ‘{’<复合语句>‘}’
bool main_function(KV* parent)
{
KV* current = new KV("<主函数>");
parent->addchildren(current);
bool flag = false;
if (match_VOIDTK(current)) {
push_token();
if (match_MAINTK(current)) {
push_token();
if (match_LPARENT(current)) {
push_token();
if (match_RPARENT(current)) {
push_token();
if (match_LBRACE(current)) {
push_token();
if (compound_statement(current)) {
if (match_RBRACE(current)) {
push_token();
push_Grammatical_definition("<主函数>");
flag = true;
}
}
}
}
//缺少)
else {
if (match_LBRACE(current)) {
push_token();
if (compound_statement(current)) {
if (match_RBRACE(current)) {
push_token();
push_Grammatical_definition("<主函数>");
flag = true;
}
}
}
}
}
}
}
return flag;
}
// <无返回值函数定义> ::= void<标识符>'('<参数表>')''{'<复合语句>'}'
bool function_definition_without_return_value(KV* parent)
{
KV* current = new KV("<无返回值函数定义>",KVS[index].lineindex);
parent->addchildren(current);
bool flag = false;
if (match_VOIDTK(current)) {
push_token();
if (match_IDENFR(current)) {
push_token();
if (match_LPARENT(current)) {
//到这里一定是<无返回值函数定义>
function_with_or_without_return_map[KVS[index - 1].value] = KVS[index - 2].category_code;
push_token();
if (parameter_table(current)) {
if (match_RPARENT(current)) {
push_token();
if (match_LBRACE(current)) {
push_token();
if (compound_statement(current)) {
if (match_RBRACE(current)) {
push_token();
push_Grammatical_definition("<无返回值函数定义>");
flag = true;
return flag;
}
}
}
}
//缺少)
else {
if (match_LBRACE(current)) {
push_token();
if (compound_statement(current)) {
if (match_RBRACE(current)) {
push_token();
push_Grammatical_definition("<无返回值函数定义>");
flag = true;
return flag;
}
}
}
}
}
}
}
//和void main 区别
else {
outputKVS.pop_back();
index--;
current->popchildren();
flag = false;
}
}
// 树节点弹出<无返回值函数定义>
if (flag == false) {
parent->popchildren();
}
return flag;
}
// <有返回值函数定义> ::= <声明头部>'('<参数表>')' '{'<复合语句>'}'
bool function_definition_with_return_value(KV* parent)
{
KV* current = new KV("<有返回值函数定义>",KVS[index].lineindex);
parent->addchildren(current);
bool flag = false;
if (declaration_header(current)) {
if (match_LPARENT(current)) {
//到这里一定是<有返回值函数定义>
function_with_or_without_return_map[KVS[index - 1].value] = KVS[index - 2].category_code;
push_token();
if (parameter_table(current)) {
if (match_RPARENT(current)) {
push_token();
if (match_LBRACE(current)) {
push_token();
if (compound_statement(current)) {
if (match_RBRACE(current)) {
push_token();
push_Grammatical_definition("<有返回值函数定义>");
flag = true;
return flag;
}
}
}
}
// 缺少)
else {
if (match_LBRACE(current)) {
push_token();
if (compound_statement(current)) {
if (match_RBRACE(current)) {
push_token();
push_Grammatical_definition("<有返回值函数定义>");
flag = true;
return flag;
}
}
}
}
}
}
}
// 树节点弹出<有返回值函数定义>
if (flag == false) {
parent->popchildren();
}
return flag;
}
// <复合语句> ::= [<常量说明>][<变量说明>]<语句列>
bool compound_statement(KV* parent)
{
KV* current = new KV("<复合语句>",KVS[index].lineindex);
parent->addchildren(current);
// [<常量说明>]
if (constant_description(current)) {
}
// [<变量说明>]
if (variable_description(current)) {
}
// <语句列>
if (statement_column(current)) {
push_Grammatical_definition("<复合语句>");
return true;
}
return false;
}
// <语句列> ::=