compiler

该文章展示了一个用C++编写的简单编译器前端,包括词法分析器(Lexer)和虚拟机(VirtualMachine)的类定义。Lexer负责识别程序中的关键字、标识符、数字等元素,并进行初步处理。VirtualMachine是为执行编译后代码设计的。文章还包含了变量管理、函数列表和表达式解析的相关结构和方法。
摘要由CSDN通过智能技术生成
#include<iostream>
#include<fstream>
#include<string>
#include<unordered_map>
using namespace std;
class Lexer {
public:
	enum Type {
		Ident = -1000,
		Let,
		Fn,
		Mut,
		Struct,
		Enum,
		Integer,
		Float,
		I32,
		I64,
		F32,
		F64,
		Char,
		Addr,
		New,
		End,
		Rtn,
		While,
		If,
		Else,
		Or,
		And,
	};
	unordered_map<string, Type> KeyWord;
	ifstream inputfile;
	string filename;
	char buffer[1024];
	int bufferpos;
	int bufferlen;
	double old_fval;
	long long old_ival;
	string old_Idname;
	int old_type;
	bool backflag = 0;
	double fval;
	long long ival;
	string Idname;
	int linenum = 0;
	void InitMap() {
		KeyWord["fn"] = Type::Fn;
		KeyWord["i32"] = Type::I32;
		KeyWord["i64"] = Type::I64;
		KeyWord["f32"] = Type::F32;
		KeyWord["f64"] = Type::F64;
		KeyWord["char"] = Type::Char;
		KeyWord["struct"] = Type::Struct;
		KeyWord["let"] = Type::Let;
		KeyWord["mut"] = Type::Mut;
		KeyWord["return"] = Type::Rtn;
		KeyWord["while"] = Type::While;
		KeyWord["if"] = Type::If;
		KeyWord["else"] = Type::Else;
		KeyWord["or"] = Type::Or;
		KeyWord["and"] = Type::And;
	}
	Lexer() {}
	Lexer& operator =(const Lexer& l) {
		bufferpos = 0;
		bufferlen=0;
		InitMap();
		filename = l.filename;
		return *this;
	}
	Lexer(string filename) {
		bufferpos = 0;
		bufferlen = 0;
		InitMap();
		this->filename = filename;
	}
	void open() {
		inputfile.open(filename);
		if (inputfile.is_open() == false) {
			cout << "Open file failed<<endl" << endl;
			exit(0);
		}

	}
	static bool ishexdigit(char ch) {
		return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
	}
	static int hexChar(char ch) {
		if (ch >= '0' && ch <= '9')return ch - '0';
		else {
			if (ch >= 'a' && ch <= 'F') {
				return ch - 'a' + 10;
			}
			else {
				return ch - 'A' + 10;
			}
		}
	}
	void getBack() {
		backflag = 1;
	}
	int getNextToken() {
		if (backflag == 1) {
			backflag = 0;
			return old_type;
		}
		if (inputfile.eof()) {
			old_type = Type::End;
			return Type::End;
		}
		if (bufferpos >= bufferlen) {
			inputfile.getline(buffer, sizeof buffer);
			bufferpos = 0;
			bufferlen = strlen(buffer);
			linenum++;
		}
		while (bufferpos < bufferlen && isspace(buffer[bufferpos]))bufferpos++;
		if (bufferpos >= bufferlen) {
			old_type= getNextToken();
			return old_type;
		}
		if (isalpha(buffer[bufferpos])) {
			string tmp;
			while (bufferpos < bufferlen && (isalnum(buffer[bufferpos]) || buffer[bufferpos] == '_')) {
				tmp += buffer[bufferpos];
				bufferpos++;
			}
			if (KeyWord.find(tmp) != KeyWord.end()) {
				Idname = tmp;
				old_type = KeyWord[tmp];
				return KeyWord[tmp];
			}
			else {
				Idname = tmp;
				old_type = Type::Ident;
				return Type::Ident;
			}
		}
		if (isdigit(buffer[bufferpos])) {
			if (bufferpos + 1 < bufferlen) {
				if (buffer[bufferpos] == '0' && (buffer[bufferpos + 1] == 'x' || buffer[bufferpos + 1] == 'b')) {
					if (buffer[bufferpos + 1] == 'x') {
						bufferpos += 2;
						ival = 0;
						while (ishexdigit(buffer[bufferpos])) {
							ival = ival * 16 + hexChar(buffer[bufferpos]);
							bufferpos++;
						}
						old_type = Type::Integer;
						return Type::Integer;
					}
					else {
						bufferpos += 2;
						ival = 0;
						while (buffer[bufferpos] == '0' || buffer[bufferpos] == 1) {
							ival = ival * 2 + buffer[bufferpos] - '0';
							bufferpos++;
						}
						old_type = Type::Integer;
						return Type::Integer;
					}
				}
				else {
					ival = 0;
					while (isdigit(buffer[bufferpos])) {
						ival = ival * 10 + buffer[bufferpos] - '0';
						bufferpos++;
					}
					if (buffer[bufferpos] == '.') {
						fval = (double)ival;
						double t = 0.1;
						while (isdigit(buffer[bufferpos])) {
							fval += t * (buffer[bufferpos] - '0');
							t *= 0.1;
							bufferpos++;
						}
						if (buffer[bufferpos] == 'E' || buffer[bufferpos] == 'e') {
							bufferpos++;
							int t = 0;
							int f = 1;
							if (buffer[bufferpos] == '-') {
								f = -1;
								bufferpos++;
							}
							while (isdigit(buffer[bufferpos])) {
								t = t * 10 + buffer[bufferpos++] - '0';
								bufferpos++;
							}
							t = t * f;
							if (t > 0) {
								while (t) {
									fval *= 10;
									t--;
								}
							}
							else {
								while (t) {
									fval /= 10;
									t++;
								}
							}
							old_type = Type::Float;
							return Type::Float;
						}
					}
					else {
						old_type = Type::Integer;
						return Type::Integer;
					}
				}
			}
			else {
				ival = buffer[bufferpos] - '0';
				bufferpos++;
				old_type = Type::Integer;
				return Type::Integer;
			}
		}
		while (bufferpos < bufferlen && isspace(buffer[bufferpos])) {
			bufferpos++;
		}
		if (bufferpos >= bufferlen) {
			old_type= getNextToken();
			return old_type;
		}
		bufferpos++;
		old_type = buffer[bufferpos - 1];
		return buffer[bufferpos - 1];

	}

};
class VirtualMachine {
public:
	long long ax, bx, cx, dx, ex, fx;
	double fa, fb, fc, fd, fe, ff;
	long long sp;//栈寄存器
};

Lexer lex;
struct Var {
	string name;
};
struct VarAttr {
	Lexer::Type type;
	string infunc;
};
struct Arg {
	Var var;
	VarAttr vattr;
};
unordered_map<string, vector<Arg>> funclist;
struct VarHash{
	size_t operator()(const Var& p)const {
		return hash<string>{} (p.name);
	}
};
struct VarEqual {
	bool operator()(const Var& l, const Var& r) const{
		return l.name == r.name;
	}
};

string code;
unordered_map<Var,VarAttr,VarHash,VarEqual> varlist;
void Error(string msg) {
	cout << lex.linenum << ":" << msg << endl;
	exit(0);
}
bool ParseUnits();
bool ParseFunc();
bool ParseStmts(string&);
bool ParseStmt(string&);
bool ParseExpr(string&);
bool ParseCExpr(string&);
bool ParseUnits() {
	if (ParseFunc()) {
		if (ParseUnits()) {
			return true;
		}
		else {
			return true;
		}
	}
	return true;
}
bool ParseFunc() {
	if (lex.getNextToken() == Lexer::Type::Fn) {
		auto t = lex.getNextToken();
		if (t == Lexer::Type::Ident) {
			auto name = lex.Idname;
			if (funclist.find(name) == funclist.end()) {
				vector<Arg> args;
				if (lex.getNextToken() != '(') {
					Error("lack of (");
				}
				else {
					while (true) {
						auto t = lex.getNextToken();
						if (t == Lexer::Type::Ident) {
							auto argname = lex.Idname;
							auto t = lex.getNextToken();

							if (t == ':') {
								auto t = lex.getNextToken();
								if (t >= Lexer::Type::Integer && t <= Lexer::Type::Char) {

									if (varlist.find({ argname }) != varlist.end()&&
										varlist[{argname}].infunc==name) {
										Error("already have " + argname + "in " + name);
									}
									varlist[{argname}] = { (Lexer::Type)t,name };
									t = lex.getNextToken();
									if (t == ',') {
										continue;
									}
									else {
										Error("lack of ,");
									}
								}
								else {
									Error("Type not exist");
								}
							}
							else {
								Error("lack of :");
							}
						}
						else {
							if (t == ')') {
								
								if (lex.getNextToken() == ':') {
									auto t = lex.getNextToken();
									if(t>=Lexer::Type::Integer&&t<=Lexer::Type::Char)
									{
										
										funclist.insert({ name, args });
										if (lex.getNextToken() == '{') {
											auto t = ParseStmts(name);
											if (t) {
												if (lex.getNextToken() == '}') {
													return true;
												}
												else {
													Error("lack of }");
												}
											}
											else {
												Error("body error");
											}
										}
										else {
											Error("lack of {");
										}
									}
									else {
										Error("lack of return Type");
									}
								}
								else {
									Error("lack of :");
								}
							}
							else {
								Error("lack of arg name");
							}
						}
					}
				}
			}
			else {
				Error("already have this function " + name);
				
			}
		}
		else {
			Error("funcname is needed");
		}
	}
	else {
		return false;
		
	}
	return false;
}
bool ParseStmts(string& funcname) {
	auto t = ParseStmt(funcname);
	if (t) {
		return ParseStmts(funcname);
	}
	return true;
}
bool ParseStmt(string& funcname) {
	auto t = lex.getNextToken();
	if (t == Lexer::Type::Let) {
		if (lex.getNextToken() != Lexer::Type::Ident) {
			Error("var name needed");
		}
		if (varlist.find({ lex.Idname }) != varlist.end() && varlist[{lex.Idname}].infunc == funcname) {
			Error(lex.Idname + " have existed");
		}
		if (lex.getNextToken() != ':') {
			Error(": is needed");
		}
		auto t = lex.getNextToken();
		if ((t >= Lexer::Type::Integer && t <= Lexer::Type::Char)==false) {
			Error("Type is needed");
		}
		if (lex.getNextToken() != '=') {
			Error("= is needed");
		}
		bool p = ParseExpr(funcname);
		if (p) {
			if (lex.getNextToken() == ';')return true;
			Error("; is needed");
			return false;
		}

	}
	if (t == Lexer::Type::Ident) {
		if (lex.getNextToken() != '=')Error("= is needed");
		if (ParseExpr(funcname)) {
			if (lex.getNextToken() != ';')Error("; is needed");
			return true;
		}
	}
	if (t == Lexer::Type::While) {
		auto t = ParseCExpr(funcname);
		if (t == false) {
			Error("bool expr error");
		}
		if (lex.getNextToken() != '{')Error("lack of {");
		if (ParseStmts(funcname)) {
			if (lex.getNextToken() != '}')Error("lack of }");
			return true;
		}
	}
	if (t == Lexer::Type::If) {
		auto t = ParseCExpr(funcname);
		if (t == false) {
			Error("bool expr error");
		}
		if (lex.getNextToken() != '{')Error("lack of {");
		t = ParseStmts(funcname);
		if (t == false)Error("stmts error");
		if (lex.getNextToken() != '}')Error("lack of }");
		if (lex.getNextToken() != Lexer::Type::Else) Error("{lack of Else");
		if (lex.getNextToken() != '{')Error("lack of {");
		t = ParseStmts(funcname);
		if (t == false)Error("stmts error");
		if (lex.getNextToken() != '}')Error("lack of }");
		return true;
	}
	lex.getBack();
	return false;
}
struct Enode {
	bool flag;
	enum {E,add_T,minus_T,T,mul_T,div_T,F,null,ID,NUM,CALL,RID}type;
	string idname;
	string code;
	union {
		double fval;
		long long ival;
	};
};
Enode E(string &funcname);
Enode T(string& funcname);
Enode _E(string& funcname);
Enode _T(string& funcname);
Enode F(string& funcname);
bool ParseExpr(string& funcname) {
	auto e = E(funcname);
	cout << e.code << endl;;
	return e.flag;
}
Enode E(string& funcname) {
	auto t = T(funcname);
	if (t.flag) {
		auto _e = _E(funcname);
		if (_e.type == Enode::null) return t;
		else {
			if (_e.type == Enode::add_T) {
				if (t.idname == "" && _e.idname == "") {
					t.type = Enode::NUM;
					t.ival += _e.ival;
				}
				else {
					if (t.idname !=""&&_e.idname!="") {
						t.code += "+ " + t.idname + " " + _e.idname + "\n";
						t.type = Enode::ID;
					}
					if (t.idname == "") {
						t.code += "+" + to_string(t.ival) + " " + _e.idname+"\n";
						t.idname = _e.idname;
						t.type = Enode::ID;
					}
					else {
						t.code += "+ " + t.idname + " " + to_string(_e.ival) + "\n";
						t.type = Enode::ID;
					}
				}
			}
			else {
				if (t.idname == "" && _e.idname == "") {
					t.type = Enode::NUM;
					t.ival -= _e.ival;
				}
				else {
					if (t.idname != "" && _e.idname != "") {
						t.code += "- " + t.idname + " " + _e.idname + "\n";
						t.type = Enode::ID;
					}
					if (t.idname == "") {
						t.code += "-" + to_string(t.ival) + " " + _e.idname + "\n";
						t.idname = _e.idname;
						t.type = Enode::ID;
					}
					else {
						t.code += "- " + t.idname + " " + to_string(_e.ival) + "\n";
						t.type = Enode::ID;
					}
				}
			}
			return t;
		}
	}
	return t;
}

Enode T(string& funcname) {
	auto f = F(funcname);
	if (f.flag) {
		auto _t = _T(funcname);
		if (_t.type == Enode::null) return f;
		else {
			if (_t.type == Enode::mul_T) {
				
				if (f.idname == "" && _t.idname == "") {
					f.ival = f.ival * _t.ival;
					f.type = Enode::NUM;
				}
				else {
					if (f.idname != "" && _t.idname != "") {
						f.code += "* " + f.idname + " " + _t.idname + " \n";
						f.type = Enode::ID;
					}
					if (f.idname == "") {
						f.idname = _t.idname;
						f.code += "* " + to_string(f.ival) + " " + _t.idname+"\n";
						f.type = Enode::ID;
					}
					else {
						f.code += "* " + to_string(_t.ival) + " " + f.idname+"\n";
						f.type = Enode::ID;
					}
				}
			}
			else {
				if (f.idname == "" && _t.idname == "") {
					f.ival = f.ival / _t.ival;
					f.type = Enode::NUM;
				}
				else {
					if (f.idname != "" && _t.idname != "") {
						f.type = Enode::ID;
						f.code = "/ " + f.idname + " " + _t.idname + "\n";
					}

					if (f.idname == "") {
						f.idname = _t.idname;
						f.code += "/ " + to_string(f.ival) + " " + _t.idname+"\n";
						f.type = Enode::ID;
					}
					else {
						f.code += "/ " + f.idname+" "+to_string(_t.ival)+"\n";
						f.type = Enode::ID;
					}
				}
			}
			return f;
		}
	}
	return f;
}
Enode _E(string& funcname) {
	auto ch = lex.getNextToken();
	if (ch=='+') {
		auto t = E(funcname);
		t.type = Enode::add_T;
		return t;
	}
	if (ch == '-') {
		auto t = E(funcname);
		t.type = Enode::minus_T;
		return t;
	}
	lex.bufferpos--;
	Enode tmp;
	tmp.flag = true;
	tmp.type = Enode::null;
	return  tmp;
}
Enode _T(string& funcname) {
	auto ch = lex.getNextToken();
	if (ch == '*') {
		auto t = T(funcname);
		t.type = Enode::mul_T;
		return t;
	}
	if (ch == '/') {
		auto t = T(funcname);
		t.type = Enode::div_T;
		return t;
	}
	lex.bufferpos--;
	Enode tmp;
	tmp.flag = true;
	tmp.type = Enode::null;
	return tmp;
}
Enode F(string& funcname) {
	auto t = lex.getNextToken();
	if (t == '(') {
		auto p = E(funcname);
		if (lex.getNextToken() != ')')Error("unmatch )");
		return p;
	}
	if (t == Lexer::Type::Ident) {
		return { true,Enode::ID,lex.Idname,"", 0 };
	}
	
	if (t == '-') {
		auto n = lex.getNextToken();
		if (n == Lexer::Type::Ident) {
			return { true,Enode::RID,lex.Idname,"",0 };
		}
		if (n == Lexer::Type::Integer) {
			Enode tmp;
			tmp.flag = true;
			tmp.type = Enode::NUM;
			tmp.idname = "";
			tmp.code = "";
			tmp.ival = lex.ival*(-1);
			return tmp;
		}
		Error("lack of id or num after -");
	}
	if (t == Lexer::Type::Integer) {
		Enode tmp;
		tmp.flag = true;
		tmp.type = Enode::NUM;
		tmp.ival = lex.ival;
		tmp.code = "";
		return tmp;
	}
	Enode tmp;
	tmp.flag = false;
	tmp.type = Enode::null;
	return tmp;
}
bool C(string&);
bool _C(string&);
bool B(string&);
bool G(string&);
bool _B(string&);
bool ParseCExpr(string& fnstr) {
	return C(fnstr);
}
bool C(string&fnstr) {
	auto t = B(fnstr);
	if (t) {
		auto r = _C(fnstr);
		if (r)return true;
		else return false;
	}
	return false;
}

bool _C(string& fnstr) {
	auto tk = lex.getNextToken();
	if (tk == Lexer::Type::Or) {
		auto t = C(fnstr);
		if (t)return true;
		else return false;
	}
	lex.getBack();
	return true;
}
bool B(string& fnstr) {
	auto g = G(fnstr);
	if (g) {
		auto b = _B(fnstr);
		if (b) return true;
		else return false;
	}
	return false;
}
bool _B(string& fnstr) {
	auto v = lex.getNextToken();
	if (v == Lexer::Type::And) {
		auto  b = B(fnstr);
		if (b)return true;
		else return false;
	}
	lex.getBack();
	return true;
}
bool G(string& fnstr) {
	auto ch = lex.getNextToken();
	if (ch == '(') {
		auto c = C(fnstr);
		if (lex.getNextToken() != ')')Error("unmatch (");
		return c;
	}
	if (ch == '!') {
		if (lex.getNextToken() != '(')Error("left brace need");
		auto c = C(fnstr);
		if (lex.getNextToken() != ')')Error("unmatch )");
		return c;
	}
	lex.getBack();
	auto L = ParseExpr(fnstr); 
	if (L == 0) {
		Error("E match failed");
	}
	auto t1 = lex.getNextToken();
	if (!(t1 == '>' || t1 == '<' || t1 == '='))Error("need cmp op");
	auto t2 = lex.getNextToken();
	if (t2 != '=') {
		if (t1 == '=')Error("only one  =");
		lex.getBack();
		if (t1 == '>') {
			auto R = E(fnstr);
			return R.flag;
		}
		if (t1 == '<') {
			auto R = E(fnstr);
			return R.flag;
		}
	}
	else {
		if (t1 == '=') {
			auto R = E(fnstr);
			return R.flag;
		}
		if (t1 == '>') {
			auto R = E(fnstr);
			return R.flag;
		}
		if (t1 == '<') {
			auto R = E(fnstr);
			return R.flag;
		}
	}
	return false;
}
int main(int argc, char** argv) {
	if (argc == 1) {
		cout << "need file" << endl;
		//exit(0);
	}
	
	//cout << argv[1]<<endl;
	lex = Lexer("test.gh");
	lex.open();
	int t = ParseUnits();
	cout << t << endl;
	
	return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值