语法分析,有LL1语法分析,以及自顶向上 2种。
predict是程序自动生成和手动添加两种
语义分析:全局表
#include<iostream>
#include<stdio.h>
#include<stack>
#include<map>
#include<set>
#include<vector>
#include<queue>
#include<string>
using namespace std;
const int maxn = 10000;
/*
* 类型声明
*/
enum Terminal {
/* 簿记单词符号 */
ENDFILE, ERROR,
/* 保留字 */
PROGRAM, PROCEDURE, TYPE, VAR, IF,
THEN, ELSE, FI, WHILE, DO,
ENDWH, BEGIN, END, READ, WRITE,
ARRAY, OF, RECORD, RETURN,
/*类型*/
INTEGER, CHAR,
/* 多字符单词符号 */
ID, INTC, CHARC,
/*特殊符号 */
ASSIGN, EQ, LT, PLUS, MINUS,
TIMES, OVER, LPAREN, RPAREN, DOT,
COLON, SEMI, COMMA, LMIDPAREN, RMIDPAREN,
UNDERANGE
};
enum NonTerminal {
//总程序
Program = 50,
//程序头
ProgramHead, ProgramName,
//程序声明
DeclarePart,
//类型声明
TypeDecpart, TypeDec, TypeDecList, TypeDecMore, TypeId,
//类型
TypeDef, BaseType, StructureType, ArrayType,
Low, Top, RecType, FieldDecList, FieldDecMore, IdList, IdMore,
//变量声明
VarDecpart, VarDec, VarDecList, VarDecMore, VarIdList, VarIdMore,
//过程声明
ProcDecpart, ProcDec, ProcDecMore, ProcName,
//参数声明
ParamList, ParamDecList, ParamMore, Param, FormList, FidMore,
//过程中的声明部分
ProcDecPart,
//过程体
ProcBody,
//主程序体
ProgramBody,
//语句序列
StmList, StmMore,
//语句
Stm, AssCall,
//赋值语句
AssignmentRest,
//条件语句
ConditionalStm,
//循环语句
LoopStm,
//输入语句
InputStm, Invar,
//输出语句
OutputStm,
//返回语句
ReturnStm,
//过程调用语句
CallStmRest, ActParamList, ActParamMore,
//条件表达式
RelExp, OtherRelE,
//算术表达式
Exp, OtherTerm,
//项
Term, OtherFactor,
//因子
Factor, Variable, VariMore, FieldVar, FieldVarMore, CmpOp, AddOp, MultOp
};
map<int, string>Map;
void initmap() {
//终极符
Map.insert(make_pair(0, "ENDFILE"));
Map.insert(make_pair(1, "ERROR"));
Map.insert(make_pair(2, "PROGRAM"));
Map.insert(make_pair(3, "PROCEDURE"));
Map.insert(make_pair(4, "TYPE"));
Map.insert(make_pair(5, "VAR"));
Map.insert(make_pair(6, "IF"));
Map.insert(make_pair(7, "THEN"));
Map.insert(make_pair(8, "ELSE"));
Map.insert(make_pair(9, "FI"));
Map.insert(make_pair(10, "WHILE"));
Map.insert(make_pair(11, "DO"));
Map.insert(make_pair(12, "ENDWH"));
Map.insert(make_pair(13, "BEGIN"));
Map.insert(make_pair(14, "END"));
Map.insert(make_pair(15, "READ"));
Map.insert(make_pair(16, "WRITE"));
Map.insert(make_pair(17, "ARRAY"));
Map.insert(make_pair(18, "OF"));
Map.insert(make_pair(19, "RECORD"));
Map.insert(make_pair(20, "RETURN"));
Map.insert(make_pair(21, "INTEGER"));
Map.insert(make_pair(22, "CHAR"));
Map.insert(make_pair(23, "ID"));
Map.insert(make_pair(24, "INTC"));
Map.insert(make_pair(25, "CHARC"));
Map.insert(make_pair(26, "ASSIGN"));
Map.insert(make_pair(27, "EQ"));
Map.insert(make_pair(28, "LT"));
Map.insert(make_pair(29, "PLUS"));
Map.insert(make_pair(30, "MINUS"));
Map.insert(make_pair(31, "TIMES"));
Map.insert(make_pair(32, "OVER"));
Map.insert(make_pair(33, "LPAREN"));
Map.insert(make_pair(34, "RPAREN"));
Map.insert(make_pair(35, "DOT"));
Map.insert(make_pair(36, "COLON"));
Map.insert(make_pair(37, "SEMI"));
Map.insert(make_pair(38, "COMMA"));
Map.insert(make_pair(39, "LMIDPAREN"));
Map.insert(make_pair(40, "RMIDPAREN"));
Map.insert(make_pair(41, "UNDERANGE"));
//非终极符
Map.insert(make_pair(50, "Program"));
Map.insert(make_pair(51, "ProgramHead"));
Map.insert(make_pair(52, "ProgramName"));
Map.insert(make_pair(53, "DeclarePart"));
Map.insert(make_pair(54, "TypeDecpart"));
Map.insert(make_pair(55, "TypeDec"));
Map.insert(make_pair(56, "TypeDecList"));
Map.insert(make_pair(57, "TypeDecMore"));
Map.insert(make_pair(58, "TypeId"));
Map.insert(make_pair(59, "TypeDef"));
Map.insert(make_pair(60, "BaseType"));
Map.insert(make_pair(61, "StructureType"));
Map.insert(make_pair(62, "ArrayType"));
Map.insert(make_pair(63, "Low"));
Map.insert(make_pair(64, "Top"));
Map.insert(make_pair(65, "RecType"));
Map.insert(make_pair(66, "FieldDecList"));
Map.insert(make_pair(67, "FieldDecMore"));
Map.insert(make_pair(68, "IdList"));
Map.insert(make_pair(69, "IdMore"));
Map.insert(make_pair(70, "VarDecpart"));
Map.insert(make_pair(71, "VarDec"));
Map.insert(make_pair(72, "VarDecList"));
Map.insert(make_pair(73, "VarDecMore"));
Map.insert(make_pair(74, "VarIdList"));
Map.insert(make_pair(75, "VarIdMore"));
Map.insert(make_pair(76, "ProcDecpart"));
Map.insert(make_pair(77, "ProcDec"));
Map.insert(make_pair(78, "ProcDecMore"));
Map.insert(make_pair(79, "ProcName"));
Map.insert(make_pair(80, "ParamList"));
Map.insert(make_pair(81, "ParamDecList"));
Map.insert(make_pair(82, "ParamMore"));
Map.insert(make_pair(83, "Param"));
Map.insert(make_pair(84, "FormList"));
Map.insert(make_pair(85, "FidMore"));
Map.insert(make_pair(86, "ProcDecPart"));
Map.insert(make_pair(87, "ProcBody"));
Map.insert(make_pair(88, "ProgramBody"));
Map.insert(make_pair(89, "StmList"));
Map.insert(make_pair(90, "StmMore"));
Map.insert(make_pair(91, "Stm"));
Map.insert(make_pair(92, "AssCall"));
Map.insert(make_pair(93, "AssignmentRest"));
Map.insert(make_pair(94, "ConditionalStm"));
Map.insert(make_pair(95, "LoopStm"));
Map.insert(make_pair(96, "InputStm"));
Map.insert(make_pair(97, "Invar"));
Map.insert(make_pair(98, "OutputStm"));
Map.insert(make_pair(99, "ReturnStm"));
Map.insert(make_pair(100, "CallStmRest"));
Map.insert(make_pair(101, "ActParamList"));
Map.insert(make_pair(102, "ActParamMore"));
Map.insert(make_pair(103, "RelExp"));
Map.insert(make_pair(104, "OtherRelE"));
Map.insert(make_pair(105, "Exp"));
Map.insert(make_pair(106, "OtherTerm"));
Map.insert(make_pair(107, "Term"));
Map.insert(make_pair(108, "OtherFactor"));
Map.insert(make_pair(109, "Factor"));
Map.insert(make_pair(110, "Variable"));
Map.insert(make_pair(111, "VariMore"));
Map.insert(make_pair(112, "FieldVar"));
Map.insert(make_pair(113, "FieldVarMore"));
Map.insert(make_pair(114, "CmpOp"));
Map.insert(make_pair(115, "AddOp"));
Map.insert(make_pair(116, "MultOp"));
}
struct Token {
int hang;
Terminal Tname;
int flag;//0是终极符,1非终极符
NonTerminal nonTname;
int name;
string tokenname; //token的名字 比如有个char型变量叫sss,那么存的就是"sss"
};
/*
* 结构体构建区
*/
struct Tree {
int sonnum;
int hang;
int height = 0;
Terminal Tname;
int flag;//0是终极符,1非终极符
NonTerminal nonTname;
Tree() {
sonnum = 0;
}
Tree* son[100];
Tree* father;
~Tree() {
for (int i = 0; i < sonnum; i++) {
delete(son[i]);
}
}
};
struct Product {
NonTerminal nonTname;
int flag;
Terminal Tname;
Terminal getTerminalname() { return Tname; }
NonTerminal getNonTerminalname() { return nonTname; }
void setTerminalname(Terminal name) { Tname = name; }
void setNonTerminalname(NonTerminal name) { nonTname = name; }
};
struct Production {
Product product[100];
NonTerminal Headname;
void setHead(NonTerminal name) { Headname = name; }
int num;
Production() { num = 0; }
void setProduction(Terminal ter) {
product[num].setTerminalname(ter); product[num++].flag = 0;
}
void setProduction(NonTerminal nonter) { product[num].setNonTerminalname(nonter); product[num++].flag = 1; }
NonTerminal getHeadname() { return Headname; }
Product get(int i) { return product[i]; }
};
struct Predict {
Terminal name[100];
Terminal ter[100];
int num = 0;
void setPredict(Terminal nonter) { name[num++] = nonter; }
Terminal get(int i) { return name[i]; }
};
struct RecBody {
string name;
union {
struct {
int Size;
Terminal kind;
} CommonVar;
struct {
int Size;//Size=(up-low+1)*sizeof(ElemType)
Terminal kind;
int low;
int up;
Terminal ElemType;
} ArrayVar;
struct {
int Size;
Terminal kind;
RecBody* recbody;
} RecVar;
} type;
RecBody* link;
};
struct ParameTable {
string name;
Terminal kind;
int level;
ParameTable* next;
};
struct SymTable {
string name;
Terminal kind;
int level;
union {
struct {
int Size;
Terminal kind;
} CommonVar;
struct {
int Size;//Size=(up-low+1)*sizeof(ElemType)
Terminal kind;
int low;
int up;
Terminal ElemType;
} ArrayVar;
struct {
int Size;
Terminal kind;
RecBody* recbody;
} RecVar;
} type;
ParameTable* Parame = NULL;
SymTable* next;
};
/*
* 全局变量区
*/
Token tokenlist[maxn];
int tokenlen;
Production product[141];
Predict predict[141];
int error = 0;
Tree* root = new Tree();
int hangshu = 1; //此时所扫描token所在行数
Tree* tree[1000];
int Level = 0;//分别表示当前层数和符号表最大下标
vector<SymTable*> symArr;
vector<SymTable*> scope;
/*
* 词法分析阶段
*/
int IsOther(char ch)
{
if (ch >= 'A' && ch <= 'Z')
return 0;
if (ch >= 'a' && ch <= 'z')
return 0;
if (ch >= '0' && ch <= '9')
return 0;
return 1;
}
//判断是否为关键字
int IsKeyWord(string str, Token* pToken)
{
if (str == "endfile")
{
pToken->flag = 0;
pToken->Tname = ENDFILE;
pToken->tokenname = str;
return 1;
}
else if (str == "error")
{
pToken->flag = 0;
pToken->Tname = ERROR;
pToken->tokenname = str;
return 1;
}
else if (str == "integer")
{
pToken->flag = 0;
pToken->Tname = INTEGER;
pToken->tokenname = str;
return 1;
}
else if (str == "char")
{
pToken->flag = 0;
pToken->Tname = CHAR;
pToken->tokenname = str;
return 1;
}
else if (str == "program")
{
pToken->flag = 0;
pToken->Tname = PROGRAM;
pToken->tokenname = str;
return 1;
}
else if (str == "array")
{
pToken->flag = 0;
pToken->Tname = ARRAY;
pToken->tokenname = str;
return 1;
}
else if (str == "of")
{
pToken->flag = 0;
pToken->Tname = OF;
pToken->tokenname = str;
return 1;
}
else if (str == "record")
{
pToken->flag = 0;
pToken->Tname = RECORD;
pToken->tokenname = str;
return 1;
}
else if (str == "end")
{
pToken->flag = 0;
pToken->Tname = END;
pToken->tokenname = str;
return 1;
}
else if (str == "var")
{
pToken->flag = 0;
pToken->Tname = VAR;
pToken->tokenname = str;
return 1;
}
else if (str == "procedure")
{
pToken->flag = 0;
pToken->Tname = PROCEDURE;
pToken->tokenname = str;
return 1;
}
else if (str == "begin")
{
pToken->flag = 0;
pToken->Tname = BEGIN;
pToken->tokenname = str;
return 1;
}
else if (str == "if")
{
pToken->flag = 0;
pToken->Tname = IF;
pToken->tokenname = str;
return 1;
}
else if (str == "then")
{
pToken->flag = 0;
pToken->Tname = THEN;
pToken->tokenname = str;
return 1;
}
else if (str == "else")
{
pToken->flag = 0;
pToken->Tname = ELSE;
pToken->tokenname = str;
return 1;
}
else if (str == "fi")
{
pToken->flag = 0;
pToken->Tname = FI;
pToken->tokenname = str;
return 1;
}
else if (str == "while")
{
pToken->flag = 0;
pToken->Tname = WHILE;
pToken->tokenname = str;
return 1;
}
else if (str == "do")
{
pToken->flag = 0;
pToken->Tname = DO;
pToken->tokenname = str;
return 1;
}
else if (str == "endwh")
{
pToken->flag = 0;
pToken->Tname = ENDWH;
pToken->tokenname = str;
return 1;
}
else if (str == "read")
{
pToken->flag = 0;
pToken->Tname = READ;
pToken->tokenname = str;
return 1;
}
else if (str == "write")
{
pToken->flag = 0;
pToken->Tname = WRITE;
pToken->tokenname = str;
return 1;
}
else if (str == "return")
{
pToken->flag = 0;
pToken->Tname = RETURN;
pToken->tokenname = str;
return 1;
}
else if (str == "type")
{
pToken->flag = 0;
pToken->Tname = TYPE;
pToken->tokenname = str;
return 1;
}
else return 0;
}
Token* Scanner(FILE* pf)
{
char ch;
string tmpStr = "";
Token* pToken = new Token();
/*用于检测标识符是否合法*/
char biaozhi[50] = { '\0' };
int pb = 0;
LS0://根据第一个字符确定程序走向
{
ch = fgetc(pf);
if (ch != EOF) //没有读到文件末尾
{
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
goto LS1;
if (ch >= '0' && ch <= '9')
goto LS2;
if (ch == '+')
goto LS3;
if (ch == '-')
goto LS4;
if (ch == '*')
goto LS5;
if (ch == '/')
goto LS6;
if (ch == '<')
goto LS7;
if (ch == ';')
goto LS8;
if (ch == ':')
goto LS9;
if (ch == ',')
goto LS10;
if (ch == '.')
goto LS11;
if (ch == '=')
goto LS12;
if (ch == '[')
goto LS13;
if (ch == ']')
goto LS14;
if (ch == '(')
goto LS15;
if (ch == ')')
goto LS16;
if (ch == '\'')
goto LS17;
if (ch == ' ')
goto LS18;
if (ch == '\n' || ch == '\r' || ch == '\t')
goto LS19;
goto LS20;
}
else
{
return NULL;
}
}
LS1://若字母开头,可能为标示符或关键字
{
tmpStr += ch;
biaozhi[pb] = ch;
pb++;
int flag;
ch = fgetc(pf); //读取下一个字符
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9'))
goto LS1;
if ((flag = IsOther(ch)) == 1) //如果读到了除字母数字以外的
{
ungetc(ch, pf); //把读到的字符放回到文件流中
if ((flag = IsKeyWord(tmpStr, pToken)) == 1) //如果判断字符串是关键字
{
//char数组清零
for (int s = 0; s < 50; s++)
biaozhi[s] = '\0';
pb = 0;
//放入token序列
pToken->hang = hangshu;
return pToken;
}
else
{