使用TypeScript实现的Parse函数,构建AST语法树,编译使用tsc -t es6。实现了词法分析+语法分析。实现了静态类型方式,赋值解析,函数解析,表达式解析等,未实现if语句,while语句,for语句
语法分析函数:
function parse(source : string) : BProgram {
let program : BProgram = new BProgram();
let stack : Context[] = [new Context(null, program, "ready")];
let p : number = 0;
while (p < source.length) {
let r : [string, number] = readToken(source, p);
let token = r[0];
p = r[1];
if (token == null) { // 内容结尾
return program;
}
if (program.env.CaseSensitive == false) { // 此种语言不区分大小写
token = token.toUpperCase();
}
if (stack.last().klass == "Scene") {
if (stack.last().status == "ready") {
if (token == '(') { // it's func
let sCtx = stack.pop();
let ctx = stack.last();
let e : BExpressDefine = <BExpressDefine> (ctx.stmt);
let f : BFunctionCall = new BFunctionCall();
f.name = (<BScene> sCtx.stmt).token;
e.push(f);
stack.push(new Context(ctx, f, "ready"));
}
else { // it's variable or const
let sCtx = stack.pop();
let scene = <BScene> sCtx.stmt;
(<BExpressDefine>stack.last().stmt).push( new BIdentifier(scene.token) );
}
}
}
if (stack.last().stmt.getClass() == "ExpressDefine") {
let ctx : Context = stack.last();
let def : BExpressDefine = <BExpressDefine> ctx.stmt;
if (ctx.status == "ready") {
if (token == ':') {
ctx = stack.pop();
stack.last().status = "express_read_end";
}
else if (token == ')') {
stack.pop();
stack.last().status = "express_read_end";
}
else if (token == ',') {
stack.pop();
stack.last().status = "express_read_end";
}
else if (token == '"') {
let r : [string, number] = readString(source, p);
let v = new BValue();
v.type = BTypeDefine.String;
v.name = r[0];
p = r[1];
def.push(v);
}
else if (isNumber(token)) {
let v : BValue = new BValue();
v.name = token;
v.type = BTypeDefine.Integer;
def.push(v);
}
else if (BOperation.find(token) != null) {
let op : BOperation = BOperation.find(token);
def.push(op);
}
else if (token == "True" || token == "False") {
let b : BValue = new BValue();
b.name = token;
b.type = BTypeDefine.Boolean;
def.push(b);
}
else if (isIdentifier(token)) {
let s : BScene = new BScene(token);
stack.push(new Context(ctx, s, "ready"));
}
}
else if (stack.last().status == "assign_read_expr_doublequote") {
if (token == '"')
ctx.status = "ready";
else
throw "string miss double quote";
}
else if (stack.last().status == "express_read_end") {
stack.pop();
stack.last().status = "express_read_end";
}
}
let ctx : Context = stack.last(); // 当前处理的上下文
let klass = ctx.klass;
let status = ctx.status; // 当前上下文处理的状态
if (klass== "Program" || klass == "FunctionDefine") {
if (status == "ready") {
if (token == "Function") {
if (klass == "Program") {
let def = <BProgram> ctx.stmt ;
let f : BFunctionDefine = <BFunctionDefine> def.addStatment(new BFunctionDefine(program));
stack.push(new Context(ctx, f, "function_read_funcname"));
}
else
throw "error miss keyword function";
}
else if (token == "Dim" ) {
let def : BContainerStatment = <BContainerStatment> ctx.stmt;
let d : BVariableDefine = <BVariableDefine> def.addStatment(new BVariableDefine());
stack.push(new Context( ctx, d, "dim_read_varname"));
}
else if (token == "Let") {
let def : BContainerStatment = <BContainerStatment> ctx.stmt;
let d : BAssignStatment = <BAssignStatment> def.addStatment(new BAssignStatment());
stack.push(new Context(ctx, d, "ready"));
}
else if (token == "Return") {
let def : BContainerStatment = <BContainerStatment> ctx.stmt;
let d : BReturnStatement = <BReturnStatement> def.addStatment(new BReturnStatement());
stack.push(new Context(ctx, d, "ready"));
stack.push(new Context(ctx, d.body, "ready"));
}
else if (token == "End") {
ctx.status = "function_read_end";
}
else if (isIdentifier(token)) {
let def : BContainerStatment = <BContainerStatment> ctx.stmt;
let d : BExpressDefine = <BExpressDefine> def.addStatment(new BExpressDefine());
stack.push(new Context(ctx, d, "ready"));
stack.push(new Context(ctx, new BScene(token), "ready"));
}
}
if (status == "function_read_funcname") {
if (isIdentifier(token)) {
let def : BFunctionDefine = <BFunctionDefine> ctx.stmt;
def.name = token;
ctx.status = "function_read_params_leftboundary";
}
else
throw "function name format error";
}
else if (status == "function_read_params_leftboundary") {
if (token == '(')
ctx.status = "function_read_params_name_or_end";
else
throw "syntax error, miss ( "; // Function <funcname>(<paramname> as <paramtype>[,][...]) as <returntype>
}
else if (status == "function_read_params_name_or_end" ) {
if (token == ')')
ctx.status = "function_read_return_as";
else if (token == ',')
ctx.status = "function_read_params_name";
else if (isIdentifier(token)) {
let def : BFunctionDefine = <BFunctionDefine> ctx.stmt;
let v : BVariableDefine = <BVariableDefine> def.addParamDefine(new BVariableDefine());
v.name = token;
stack.push(new Context(ctx, v, "dim_read_as"));
}
else
throw "syntax error " + token;
}
else if (status == "function_read_params_name") {
let def : BFunctionDefine = <BFunctionDefine> ctx.stmt;
ctx.status = "function_read_params_name_or_end";
if (isIdentifier(token)) {
let v = def.addParamDefine(new BVariableDefine());
v.name = token;
stack.push(new Context(ctx, v, "dim_read_as"));
}
else
throw "miss param";
}
else if (status == "function_read_return_as") {
if (token == "As")
ctx.status = "function_read_return_type";
else
throw "syntax error";
}
else if (status == "function_read_return_type") {
let def : BFunctionDefine = <BFunctionDefine> ctx.stmt;
def.returnType = token;
ctx.status = "ready";
}
else if (status == "function_read_end") {
if (token == "Function") {
stack.pop();
}
}
else if (status == "express_read_end") {
ctx.status = "ready";
}
}
else if (klass == "VariableDefine") {
if (status == "dim_read_varname") {
let def = <BVariableDefine>ctx.stmt;
def.name = token;
ctx.status = "dim_read_as";
}
else if (status == "dim_read_as") {
if (token == "As")
ctx.status = "dim_read_type";
else
throw "unknown token: " + token; // 不符合语法 Dim <varname> as <typename>
}
else if (status == "dim_read_type") {
let def = <BVariableDefine>ctx.stmt;
def.type = token;
stack.pop();
}
}
else if (klass == "AssignStatment") {
let def : BAssignStatment = <BAssignStatment>ctx.stmt;
if (status == "ready") {
if (isIdentifier(token)) {
def.varname = token;
ctx.status = "assign_read_equal";
}
else
throw "assign left not ident";
}
else if (status == "assign_read_equal") {
if (token == "=") {
stack.push(new Context(ctx, (<BAssignStatment>ctx.stmt).body, "ready"));
}
else
throw "assign stat miss equal";
}
else if (status == "express_read_end") {
stack.pop();
}
}
else if (klass == "FunctionCall") {
let def : BFunctionCall = <BFunctionCall> ctx.stmt;
if (status == "ready") {
if (token == '(') {
let b : BExpressDefine = def.pushParam(new BExpressDefine());
stack.push(new Context(ctx, b, "ready") );
}
else if (token == '"') {
let e : BExpressDefine = new BExpressDefine();
stack.push(new Context(ctx, e, "express_read_string_value") );
}
}
else if (status == "express_read_end") {
if (token == ',') {
let b : BExpressDefine = def.pushParam(new BExpressDefine());
stack.push(new Context(ctx, b, "ready") );
}
else if (token == ')') {
stack.pop();
}
}
}
else if (klass == "ReturnStatement") {
if (status == "express_read_end") {
stack.pop();
}
}
}
return program;
}
执行代码如下:
let code = "Dim a As Integer\r"
+ "Dim b As Integer\r"
+ "\r"
+ "Function add(c As Integer, d As Integer) As Integer\r"
+ " Return c+d\r"
+ "End Function\r"
+ "\r"
+ "Function main() As Integer\r"
+ " Let a = 1\r"
+ " Let b = 2\r"
+ " print(add(a,b))\r"
+ "End Function\r";
let prg : BProgram = parse(code);
console.log(prg);
console.log(prg.toString());
执行结果,输出AST结构,根据AST结构反向输出源代码