#include<iostream>
#include<fstream>
#include<string>
#include<unordered_map>
using namespace std;
class Lexer {
public:
enum Type {
Ident = -1000,
Let,
Fn,
Mut,
Struct,
Enum,
Integer,
Float,
I32,
I64,
F32,
F64,
Char,
Addr,
New,
End,
Rtn,
While,
If,
Else,
Or,
And,
};
unordered_map<string, Type> KeyWord;
ifstream inputfile;
string filename;
char buffer[1024];
int bufferpos;
int bufferlen;
double old_fval;
long long old_ival;
string old_Idname;
int old_type;
bool backflag = 0;
double fval;
long long ival;
string Idname;
int linenum = 0;
void InitMap() {
KeyWord["fn"] = Type::Fn;
KeyWord["i32"] = Type::I32;
KeyWord["i64"] = Type::I64;
KeyWord["f32"] = Type::F32;
KeyWord["f64"] = Type::F64;
KeyWord["char"] = Type::Char;
KeyWord["struct"] = Type::Struct;
KeyWord["let"] = Type::Let;
KeyWord["mut"] = Type::Mut;
KeyWord["return"] = Type::Rtn;
KeyWord["while"] = Type::While;
KeyWord["if"] = Type::If;
KeyWord["else"] = Type::Else;
KeyWord["or"] = Type::Or;
KeyWord["and"] = Type::And;
}
Lexer() {}
Lexer& operator =(const Lexer& l) {
bufferpos = 0;
bufferlen=0;
InitMap();
filename = l.filename;
return *this;
}
Lexer(string filename) {
bufferpos = 0;
bufferlen = 0;
InitMap();
this->filename = filename;
}
void open() {
inputfile.open(filename);
if (inputfile.is_open() == false) {
cout << "Open file failed<<endl" << endl;
exit(0);
}
}
static bool ishexdigit(char ch) {
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
}
static int hexChar(char ch) {
if (ch >= '0' && ch <= '9')return ch - '0';
else {
if (ch >= 'a' && ch <= 'F') {
return ch - 'a' + 10;
}
else {
return ch - 'A' + 10;
}
}
}
void getBack() {
backflag = 1;
}
int getNextToken() {
if (backflag == 1) {
backflag = 0;
return old_type;
}
if (inputfile.eof()) {
old_type = Type::End;
return Type::End;
}
if (bufferpos >= bufferlen) {
inputfile.getline(buffer, sizeof buffer);
bufferpos = 0;
bufferlen = strlen(buffer);
linenum++;
}
while (bufferpos < bufferlen && isspace(buffer[bufferpos]))bufferpos++;
if (bufferpos >= bufferlen) {
old_type= getNextToken();
return old_type;
}
if (isalpha(buffer[bufferpos])) {
string tmp;
while (bufferpos < bufferlen && (isalnum(buffer[bufferpos]) || buffer[bufferpos] == '_')) {
tmp += buffer[bufferpos];
bufferpos++;
}
if (KeyWord.find(tmp) != KeyWord.end()) {
Idname = tmp;
old_type = KeyWord[tmp];
return KeyWord[tmp];
}
else {
Idname = tmp;
old_type = Type::Ident;
return Type::Ident;
}
}
if (isdigit(buffer[bufferpos])) {
if (bufferpos + 1 < bufferlen) {
if (buffer[bufferpos] == '0' && (buffer[bufferpos + 1] == 'x' || buffer[bufferpos + 1] == 'b')) {
if (buffer[bufferpos + 1] == 'x') {
bufferpos += 2;
ival = 0;
while (ishexdigit(buffer[bufferpos])) {
ival = ival * 16 + hexChar(buffer[bufferpos]);
bufferpos++;
}
old_type = Type::Integer;
return Type::Integer;
}
else {
bufferpos += 2;
ival = 0;
while (buffer[bufferpos] == '0' || buffer[bufferpos] == 1) {
ival = ival * 2 + buffer[bufferpos] - '0';
bufferpos++;
}
old_type = Type::Integer;
return Type::Integer;
}
}
else {
ival = 0;
while (isdigit(buffer[bufferpos])) {
ival = ival * 10 + buffer[bufferpos] - '0';
bufferpos++;
}
if (buffer[bufferpos] == '.') {
fval = (double)ival;
double t = 0.1;
while (isdigit(buffer[bufferpos])) {
fval += t * (buffer[bufferpos] - '0');
t *= 0.1;
bufferpos++;
}
if (buffer[bufferpos] == 'E' || buffer[bufferpos] == 'e') {
bufferpos++;
int t = 0;
int f = 1;
if (buffer[bufferpos] == '-') {
f = -1;
bufferpos++;
}
while (isdigit(buffer[bufferpos])) {
t = t * 10 + buffer[bufferpos++] - '0';
bufferpos++;
}
t = t * f;
if (t > 0) {
while (t) {
fval *= 10;
t--;
}
}
else {
while (t) {
fval /= 10;
t++;
}
}
old_type = Type::Float;
return Type::Float;
}
}
else {
old_type = Type::Integer;
return Type::Integer;
}
}
}
else {
ival = buffer[bufferpos] - '0';
bufferpos++;
old_type = Type::Integer;
return Type::Integer;
}
}
while (bufferpos < bufferlen && isspace(buffer[bufferpos])) {
bufferpos++;
}
if (bufferpos >= bufferlen) {
old_type= getNextToken();
return old_type;
}
bufferpos++;
old_type = buffer[bufferpos - 1];
return buffer[bufferpos - 1];
}
};
class VirtualMachine {
public:
long long ax, bx, cx, dx, ex, fx;
double fa, fb, fc, fd, fe, ff;
long long sp;//栈寄存器
};
Lexer lex;
struct Var {
string name;
};
struct VarAttr {
Lexer::Type type;
string infunc;
};
struct Arg {
Var var;
VarAttr vattr;
};
unordered_map<string, vector<Arg>> funclist;
struct VarHash{
size_t operator()(const Var& p)const {
return hash<string>{} (p.name);
}
};
struct VarEqual {
bool operator()(const Var& l, const Var& r) const{
return l.name == r.name;
}
};
string code;
unordered_map<Var,VarAttr,VarHash,VarEqual> varlist;
void Error(string msg) {
cout << lex.linenum << ":" << msg << endl;
exit(0);
}
bool ParseUnits();
bool ParseFunc();
bool ParseStmts(string&);
bool ParseStmt(string&);
bool ParseExpr(string&);
bool ParseCExpr(string&);
bool ParseUnits() {
if (ParseFunc()) {
if (ParseUnits()) {
return true;
}
else {
return true;
}
}
return true;
}
bool ParseFunc() {
if (lex.getNextToken() == Lexer::Type::Fn) {
auto t = lex.getNextToken();
if (t == Lexer::Type::Ident) {
auto name = lex.Idname;
if (funclist.find(name) == funclist.end()) {
vector<Arg> args;
if (lex.getNextToken() != '(') {
Error("lack of (");
}
else {
while (true) {
auto t = lex.getNextToken();
if (t == Lexer::Type::Ident) {
auto argname = lex.Idname;
auto t = lex.getNextToken();
if (t == ':') {
auto t = lex.getNextToken();
if (t >= Lexer::Type::Integer && t <= Lexer::Type::Char) {
if (varlist.find({ argname }) != varlist.end()&&
varlist[{argname}].infunc==name) {
Error("already have " + argname + "in " + name);
}
varlist[{argname}] = { (Lexer::Type)t,name };
t = lex.getNextToken();
if (t == ',') {
continue;
}
else {
Error("lack of ,");
}
}
else {
Error("Type not exist");
}
}
else {
Error("lack of :");
}
}
else {
if (t == ')') {
if (lex.getNextToken() == ':') {
auto t = lex.getNextToken();
if(t>=Lexer::Type::Integer&&t<=Lexer::Type::Char)
{
funclist.insert({ name, args });
if (lex.getNextToken() == '{') {
auto t = ParseStmts(name);
if (t) {
if (lex.getNextToken() == '}') {
return true;
}
else {
Error("lack of }");
}
}
else {
Error("body error");
}
}
else {
Error("lack of {");
}
}
else {
Error("lack of return Type");
}
}
else {
Error("lack of :");
}
}
else {
Error("lack of arg name");
}
}
}
}
}
else {
Error("already have this function " + name);
}
}
else {
Error("funcname is needed");
}
}
else {
return false;
}
return false;
}
bool ParseStmts(string& funcname) {
auto t = ParseStmt(funcname);
if (t) {
return ParseStmts(funcname);
}
return true;
}
bool ParseStmt(string& funcname) {
auto t = lex.getNextToken();
if (t == Lexer::Type::Let) {
if (lex.getNextToken() != Lexer::Type::Ident) {
Error("var name needed");
}
if (varlist.find({ lex.Idname }) != varlist.end() && varlist[{lex.Idname}].infunc == funcname) {
Error(lex.Idname + " have existed");
}
if (lex.getNextToken() != ':') {
Error(": is needed");
}
auto t = lex.getNextToken();
if ((t >= Lexer::Type::Integer && t <= Lexer::Type::Char)==false) {
Error("Type is needed");
}
if (lex.getNextToken() != '=') {
Error("= is needed");
}
bool p = ParseExpr(funcname);
if (p) {
if (lex.getNextToken() == ';')return true;
Error("; is needed");
return false;
}
}
if (t == Lexer::Type::Ident) {
if (lex.getNextToken() != '=')Error("= is needed");
if (ParseExpr(funcname)) {
if (lex.getNextToken() != ';')Error("; is needed");
return true;
}
}
if (t == Lexer::Type::While) {
auto t = ParseCExpr(funcname);
if (t == false) {
Error("bool expr error");
}
if (lex.getNextToken() != '{')Error("lack of {");
if (ParseStmts(funcname)) {
if (lex.getNextToken() != '}')Error("lack of }");
return true;
}
}
if (t == Lexer::Type::If) {
auto t = ParseCExpr(funcname);
if (t == false) {
Error("bool expr error");
}
if (lex.getNextToken() != '{')Error("lack of {");
t = ParseStmts(funcname);
if (t == false)Error("stmts error");
if (lex.getNextToken() != '}')Error("lack of }");
if (lex.getNextToken() != Lexer::Type::Else) Error("{lack of Else");
if (lex.getNextToken() != '{')Error("lack of {");
t = ParseStmts(funcname);
if (t == false)Error("stmts error");
if (lex.getNextToken() != '}')Error("lack of }");
return true;
}
lex.getBack();
return false;
}
struct Enode {
bool flag;
enum {E,add_T,minus_T,T,mul_T,div_T,F,null,ID,NUM,CALL,RID}type;
string idname;
string code;
union {
double fval;
long long ival;
};
};
Enode E(string &funcname);
Enode T(string& funcname);
Enode _E(string& funcname);
Enode _T(string& funcname);
Enode F(string& funcname);
bool ParseExpr(string& funcname) {
auto e = E(funcname);
cout << e.code << endl;;
return e.flag;
}
Enode E(string& funcname) {
auto t = T(funcname);
if (t.flag) {
auto _e = _E(funcname);
if (_e.type == Enode::null) return t;
else {
if (_e.type == Enode::add_T) {
if (t.idname == "" && _e.idname == "") {
t.type = Enode::NUM;
t.ival += _e.ival;
}
else {
if (t.idname !=""&&_e.idname!="") {
t.code += "+ " + t.idname + " " + _e.idname + "\n";
t.type = Enode::ID;
}
if (t.idname == "") {
t.code += "+" + to_string(t.ival) + " " + _e.idname+"\n";
t.idname = _e.idname;
t.type = Enode::ID;
}
else {
t.code += "+ " + t.idname + " " + to_string(_e.ival) + "\n";
t.type = Enode::ID;
}
}
}
else {
if (t.idname == "" && _e.idname == "") {
t.type = Enode::NUM;
t.ival -= _e.ival;
}
else {
if (t.idname != "" && _e.idname != "") {
t.code += "- " + t.idname + " " + _e.idname + "\n";
t.type = Enode::ID;
}
if (t.idname == "") {
t.code += "-" + to_string(t.ival) + " " + _e.idname + "\n";
t.idname = _e.idname;
t.type = Enode::ID;
}
else {
t.code += "- " + t.idname + " " + to_string(_e.ival) + "\n";
t.type = Enode::ID;
}
}
}
return t;
}
}
return t;
}
Enode T(string& funcname) {
auto f = F(funcname);
if (f.flag) {
auto _t = _T(funcname);
if (_t.type == Enode::null) return f;
else {
if (_t.type == Enode::mul_T) {
if (f.idname == "" && _t.idname == "") {
f.ival = f.ival * _t.ival;
f.type = Enode::NUM;
}
else {
if (f.idname != "" && _t.idname != "") {
f.code += "* " + f.idname + " " + _t.idname + " \n";
f.type = Enode::ID;
}
if (f.idname == "") {
f.idname = _t.idname;
f.code += "* " + to_string(f.ival) + " " + _t.idname+"\n";
f.type = Enode::ID;
}
else {
f.code += "* " + to_string(_t.ival) + " " + f.idname+"\n";
f.type = Enode::ID;
}
}
}
else {
if (f.idname == "" && _t.idname == "") {
f.ival = f.ival / _t.ival;
f.type = Enode::NUM;
}
else {
if (f.idname != "" && _t.idname != "") {
f.type = Enode::ID;
f.code = "/ " + f.idname + " " + _t.idname + "\n";
}
if (f.idname == "") {
f.idname = _t.idname;
f.code += "/ " + to_string(f.ival) + " " + _t.idname+"\n";
f.type = Enode::ID;
}
else {
f.code += "/ " + f.idname+" "+to_string(_t.ival)+"\n";
f.type = Enode::ID;
}
}
}
return f;
}
}
return f;
}
Enode _E(string& funcname) {
auto ch = lex.getNextToken();
if (ch=='+') {
auto t = E(funcname);
t.type = Enode::add_T;
return t;
}
if (ch == '-') {
auto t = E(funcname);
t.type = Enode::minus_T;
return t;
}
lex.bufferpos--;
Enode tmp;
tmp.flag = true;
tmp.type = Enode::null;
return tmp;
}
Enode _T(string& funcname) {
auto ch = lex.getNextToken();
if (ch == '*') {
auto t = T(funcname);
t.type = Enode::mul_T;
return t;
}
if (ch == '/') {
auto t = T(funcname);
t.type = Enode::div_T;
return t;
}
lex.bufferpos--;
Enode tmp;
tmp.flag = true;
tmp.type = Enode::null;
return tmp;
}
Enode F(string& funcname) {
auto t = lex.getNextToken();
if (t == '(') {
auto p = E(funcname);
if (lex.getNextToken() != ')')Error("unmatch )");
return p;
}
if (t == Lexer::Type::Ident) {
return { true,Enode::ID,lex.Idname,"", 0 };
}
if (t == '-') {
auto n = lex.getNextToken();
if (n == Lexer::Type::Ident) {
return { true,Enode::RID,lex.Idname,"",0 };
}
if (n == Lexer::Type::Integer) {
Enode tmp;
tmp.flag = true;
tmp.type = Enode::NUM;
tmp.idname = "";
tmp.code = "";
tmp.ival = lex.ival*(-1);
return tmp;
}
Error("lack of id or num after -");
}
if (t == Lexer::Type::Integer) {
Enode tmp;
tmp.flag = true;
tmp.type = Enode::NUM;
tmp.ival = lex.ival;
tmp.code = "";
return tmp;
}
Enode tmp;
tmp.flag = false;
tmp.type = Enode::null;
return tmp;
}
bool C(string&);
bool _C(string&);
bool B(string&);
bool G(string&);
bool _B(string&);
bool ParseCExpr(string& fnstr) {
return C(fnstr);
}
bool C(string&fnstr) {
auto t = B(fnstr);
if (t) {
auto r = _C(fnstr);
if (r)return true;
else return false;
}
return false;
}
bool _C(string& fnstr) {
auto tk = lex.getNextToken();
if (tk == Lexer::Type::Or) {
auto t = C(fnstr);
if (t)return true;
else return false;
}
lex.getBack();
return true;
}
bool B(string& fnstr) {
auto g = G(fnstr);
if (g) {
auto b = _B(fnstr);
if (b) return true;
else return false;
}
return false;
}
bool _B(string& fnstr) {
auto v = lex.getNextToken();
if (v == Lexer::Type::And) {
auto b = B(fnstr);
if (b)return true;
else return false;
}
lex.getBack();
return true;
}
bool G(string& fnstr) {
auto ch = lex.getNextToken();
if (ch == '(') {
auto c = C(fnstr);
if (lex.getNextToken() != ')')Error("unmatch (");
return c;
}
if (ch == '!') {
if (lex.getNextToken() != '(')Error("left brace need");
auto c = C(fnstr);
if (lex.getNextToken() != ')')Error("unmatch )");
return c;
}
lex.getBack();
auto L = ParseExpr(fnstr);
if (L == 0) {
Error("E match failed");
}
auto t1 = lex.getNextToken();
if (!(t1 == '>' || t1 == '<' || t1 == '='))Error("need cmp op");
auto t2 = lex.getNextToken();
if (t2 != '=') {
if (t1 == '=')Error("only one =");
lex.getBack();
if (t1 == '>') {
auto R = E(fnstr);
return R.flag;
}
if (t1 == '<') {
auto R = E(fnstr);
return R.flag;
}
}
else {
if (t1 == '=') {
auto R = E(fnstr);
return R.flag;
}
if (t1 == '>') {
auto R = E(fnstr);
return R.flag;
}
if (t1 == '<') {
auto R = E(fnstr);
return R.flag;
}
}
return false;
}
int main(int argc, char** argv) {
if (argc == 1) {
cout << "need file" << endl;
//exit(0);
}
//cout << argv[1]<<endl;
lex = Lexer("test.gh");
lex.open();
int t = ParseUnits();
cout << t << endl;
return 0;
}
compiler
最新推荐文章于 2024-11-05 17:16:24 发布
该文章展示了一个用C++编写的简单编译器前端,包括词法分析器(Lexer)和虚拟机(VirtualMachine)的类定义。Lexer负责识别程序中的关键字、标识符、数字等元素,并进行初步处理。VirtualMachine是为执行编译后代码设计的。文章还包含了变量管理、函数列表和表达式解析的相关结构和方法。
摘要由CSDN通过智能技术生成