递归下降一直下降到表达式为止。
语句(statement):描述做什么,不需要返回值 (if…else…,while…,return… )
表达式(expression):为了求值,需要一个返回值 ( 2+3,4*5 )
但是a = 3的情况很难说是赋值语句还是表达式
例如下面的情况:
if((a=3)!=0){ //这里(a=3)会返回值3,因此3!=0,条件为真
xxxxx;
}else{
xxxxx;
}
因此我们把a = 3看成是表达式。
操作符优先级(operator precedence):
优先级爬山算法:也就是遇到的符号的优先级能在栈中的情况一定是从栈底到栈顶,优先级越来越高(>=,也可以同级)的情况(比如+ → * → ++ )
//表达式的处理过程
*++a++ = i==0 ? 2+3 : 4+5 ;
//operater stack: a
// number stack: * ++
//下一个要入栈的操作符是后置的++,优先级更低,所以要先出栈
// number stack: a
// operater stack: * ++
// 先出栈number stack中的++,处理++a,假设原本a = 230230,(64bit的情况下)++a后变成230238.
// number stack: a
// operater stack: *
// 此时后置的++优先级依然比a低,因此还要继续出栈number stack中的*,给a dereference
// number stack: *a
// operater stack: ++
// 此时置入=,=的优先级依然比++低,因此还要继续出栈number stack中的++,因此(*a)++
// 然后i进入operater stack
// number stack: *a i
// operater stack: ++
// 此时置入=,=的优先级依然比++低,因此还要继续出栈number stack中的++,因此(*a)++
// number stack: *a i
// operater stack: =
// ==的优先级比=高,所以==可以直接入operater stack
// number stack: *a i 0
// operater stack: = ==
//三元操作符? :的优先级很低,不能直接入栈
//先运算栈中的i==0,假设i=0,结果为true,然后操作符==出栈,运算的数字i和0出栈,结果true入栈
// number stack: *a true
// operater stack: = ?:
//运算已经到末尾,此时会对?:做处理,true时是2+3
// number stack: *a 2 3
// operater stack: = +
//继续出栈符号和数字
// number stack: *a 5
// operater stack: =
//运算的最后一步,赋值操作 *a = 5
//表达式完成求值
上面的
number stack: 可以通过压入stack中实现
operater stack:可以通过递归调用的函数栈实现
parse_expr()函数:
int type; // pass type in recursive parse expr
void parse_expr(int precd) { //precd会传入一个优先级,就是目前栈顶的operater的优先级,是个枚举变量对应的数值
int tmp_type, i;
int* tmp_ptr;
// const number
if (token == Num) {
tokenize();
*++code = IMM;
*++code = token_val;
type = INT;
}
// const string
else if (token == '"') {
*++code = IMM;
*++code = token_val; // string addr
assert('"'); while (token == '"') assert('"'); // handle multi-row
data = (char*)((int)data + 8 & -8); // add \0 for string & align 8
type = PTR;
}
else if (token == Sizeof) { //这里把sizeof当成一个数据,因为sizeof的返回值是int
tokenize(); assert('(');
type = parse_base_type();
while (token == Mul) {assert(Mul); type = type + PTR;}
assert(')');
*++code = IMM;
*++code = (type == CHAR) ? 1 : 8;
type = INT;
}
// handle identifer: variable or function all
else if (token == Id) { //如果token=Id,那么可能是变量,也有可能是函数调用
tokenize();
tmp_ptr = symbol_ptr; // for recursive parse
// function call 函数调用
if (token == '(') {
assert('(');
i = 0; // number of args
while (token != ')') {
parse_expr(Assign);
*++code = PUSH; i++;
if (token == ',') assert(',');
} assert(')');
// native call
if (tmp_ptr[Class] == Sys) *++code = tmp_ptr[Value];
// fun call
else if (tmp_ptr[Class] == Fun) {*++code = CALL; *++code = tmp_ptr[Value];}
else {printf("line %lld: invalid function call\n", line); exit(-1);}
// delete stack frame for args
if (i > 0) {*++code = DARG; *++code = i;}
type = tmp_ptr[Type];
}
// handle enum value 存储数据的变量名
else if (tmp_ptr[Class] == Num) {
*++code = IMM; *++code = tmp_ptr[Value]; type = INT;
}
// handle variables
else {
// local var, calculate addr base ibp
if (tmp_ptr[Class] == Loc) {*++code = LEA; *++code = ibp - tmp_ptr[Value];}
// global var
else if (tmp_ptr[Class] == Glo) {*++code = IMM; *++code = tmp_ptr[Value];}
else {printf("line %lld: invalid variable\n", line); exit(-1);}
type = tmp_ptr[Type];
*++code = (type == CHAR) ? LC : LI;
}
}
// cast or parenthesis
else if (token == '(') {
assert('(');
if (token == Char || token == Int) {
tokenize();
tmp_type = token - Char + CHAR;
while (token == Mul) {assert(Mul); tmp_type = tmp_type + PTR;}
// use precedence Inc represent all unary operators
assert(')'); parse_expr(Inc); type = tmp_type;
} else {
parse_expr(Assign); assert(')');
}
}
// derefer
else if (token == Mul) {
tokenize(); parse_expr(Inc);
if (type >= PTR) type = type - PTR;
else {printf("line %lld: invalid dereference\n", line); exit(-1);}
*++code = (type == CHAR) ? LC : LI;
}
// reference
else if (token == And) {
tokenize(); parse_expr(Inc);
if (*code == LC || *code == LI) code--; // rollback load by addr
else {printf("line %lld: invalid reference\n", line); exit(-1);}
type = type + PTR;
}
// Not
else if (token == '!') {
tokenize(); parse_expr(Inc);
*++code = PUSH; *++code = IMM; *++code = 0; *++code = EQ;
type = INT;
}
// bitwise
else if (token == '~') {
tokenize(); parse_expr(Inc);
*++code = PUSH; *++code = IMM; *++code = -1; *++code = XOR;
type = INT;
}
// positive
else if (token == And) {tokenize(); parse_expr(Inc); type = INT;}
// negative
else if (token == Sub) {
tokenize(); parse_expr(Inc);
*++code = PUSH; *++code = IMM; *++code = -1; *++code = MUL;
type = INT;
}
// ++var --var 前置的++,--
else if (token == Inc || token == Dec) {
i = token; tokenize(); parse_expr(Inc);
// save var addr, then load var val
if (*code == LC) {*code = PUSH; *++code = LC;}
else if (*code == LI) {*code = PUSH; *++code = LI;}
else {printf("line %lld: invalid Inc or Dec\n", line); exit(-1);}
*++code = PUSH; // save var val
*++code = IMM; *++code = (type > PTR) ? 8 : 1;
*++code = (i == Inc) ? ADD : SUB; // calculate
*++code = (type == CHAR) ? SC : SI; // write back to var addr
}
else {printf("line %lld: invalid expression\n", line); exit(-1);}
// use [precedence climbing] method to handle binary(or postfix) operators
// 如果token优先级是比栈顶更高的话,就进入while循环
while (token >= precd) {
tmp_type = type;
// assignment 赋值
if (token == Assign) {
tokenize();
if (*code == LC || *code == LI) *code = PUSH;
else {printf("line %lld: invalid assignment\n", line); exit(-1);}
parse_expr(Assign); type = tmp_type; // type can be cast
*++code = (type == CHAR) ? SC : SI;
}
// ? :, same as if stmt
else if (token == Cond) {
tokenize(); *++code = JZ; tmp_ptr = ++code;
parse_expr(Assign); assert(':');
*tmp_ptr = (int)(code + 3);
*++code = JMP; tmp_ptr = ++code; // save endif addr
parse_expr(Cond);
*tmp_ptr = (int)(code + 1); // write back endif point
}
// logic operators, simple and boring, copy from c4
else if (token == Lor) {
tokenize(); *++code = JNZ; tmp_ptr = ++code;
parse_expr(Land); *tmp_ptr = (int)(code + 1); type = INT;}
else if (token == Land) {
tokenize(); *++code = JZ; tmp_ptr = ++code;
parse_expr(Or); *tmp_ptr = (int)(code + 1); type = INT;}
// ↓ 把ax压入栈中 ↓递归调用,解析剩下的表达式 ↓完成递归回到这里就要计算对应operater的操作符了
else if (token == Or) {tokenize(); *++code = PUSH; parse_expr(Xor); *++code = OR; type = INT;}
else if (token == Xor) {tokenize(); *++code = PUSH; parse_expr(And); *++code = XOR; type = INT;}
else if (token == And) {tokenize(); *++code = PUSH; parse_expr(Eq); *++code = AND; type = INT;}
else if (token == Eq) {tokenize(); *++code = PUSH; parse_expr(Lt); *++code = EQ; type = INT;}
else if (token == Ne) {tokenize(); *++code = PUSH; parse_expr(Lt); *++code = NE; type = INT;}
else if (token == Lt) {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = LT; type = INT;}
else if (token == Gt) {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = GT; type = INT;}
else if (token == Le) {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = LE; type = INT;}
else if (token == Ge) {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = GE; type = INT;}
else if (token == Shl) {tokenize(); *++code = PUSH; parse_expr(Add); *++code = SHL; type = INT;}
else if (token == Shr) {tokenize(); *++code = PUSH; parse_expr(Add); *++code = SHR; type = INT;}
// arithmetic operators
else if (token == Add) { // 加
tokenize(); *++code = PUSH; parse_expr(Mul);
// int pointer * 8
if (tmp_type > PTR) {*++code = PUSH; *++code = IMM; *++code = 8; *++code = MUL;}
*++code = ADD; type = tmp_type;
}
else if (token == Sub) { // 减
tokenize(); *++code = PUSH; parse_expr(Mul);
if (tmp_type > PTR && tmp_type == type) {
// pointer - pointer, ret / 8
*++code = SUB; *++code = PUSH;
*++code = IMM; *++code = 8;
*++code = DIV; type = INT;}
else if (tmp_type > PTR) {
*++code = PUSH;
*++code = IMM; *++code = 8;
*++code = MUL;
*++code = SUB; type = tmp_type;}
else *++code = SUB;
}
else if (token == Mul) {tokenize(); *++code = PUSH; parse_expr(Inc); *++code = MUL; type = INT;}
else if (token == Div) {tokenize(); *++code = PUSH; parse_expr(Inc); *++code = DIV; type = INT;}
else if (token == Mod) {tokenize(); *++code = PUSH; parse_expr(Inc); *++code = MOD; type = INT;}
// var++, var-- 后置的++,--
else if (token == Inc || token == Dec) {
if (*code == LC) {*code = PUSH; *++code = LC;} // save var addr
else if (*code == LI) {*code = PUSH; *++code = LI;}
else {printf("%lld: invlid operator=%lld\n", line, token); exit(-1);}
*++code = PUSH; *++code = IMM; *++code = (type > PTR) ? 8 : 1;
*++code = (token == Inc) ? ADD : SUB;
*++code = (type == CHAR) ? SC : SI; // save value ++ or -- to addr
*++code = PUSH; *++code = IMM; *++code = (type > PTR) ? 8 : 1;
*++code = (token == Inc) ? SUB : ADD; // restore value before ++ or --
tokenize();
}
// a[x] = *(a + x)
else if (token == Brak) {
assert(Brak); *++code = PUSH; parse_expr(Assign); assert(']');
if (tmp_type > PTR) {*++code = PUSH; *++code = IMM; *++code = 8; *++code = MUL;}
else if (tmp_type < PTR) {printf("line %lld: invalid index op\n", line); exit(-1);}
*++code = ADD; type = tmp_type - PTR;
*++code = (type == CHAR) ? LC : LI;
}
else {printf("%lld: invlid token=%lld\n", line, token); exit(-1);}
}
}