「700行手写编译器」Part 3.3:表达式与优先级爬山 笔记

递归下降一直下降到表达式为止。
在这里插入图片描述
语句(statement):描述做什么,不需要返回值 (if…else…,while…,return… )
表达式(expression):为了求值,需要一个返回值 ( 2+3,4*5 )
但是a = 3的情况很难说是赋值语句还是表达式
例如下面的情况:

if((a=3)!=0){   //这里(a=3)会返回值3,因此3!=0,条件为真
	xxxxx;
}else{
	xxxxx;
}

因此我们把a = 3看成是表达式


操作符优先级(operator precedence)
在这里插入图片描述

在这里插入图片描述

在这里插入图片描述
优先级爬山算法:也就是遇到的符号的优先级能在栈中的情况一定是从栈底到栈顶,优先级越来越高(>=,也可以同级)的情况(比如+ → * → ++ )

//表达式的处理过程
*++a++ = i==0 ? 2+3 : 4+5 ;

//operater stack: a
//  number stack: * ++

//下一个要入栈的操作符是后置的++,优先级更低,所以要先出栈

//   number stack: a
// operater stack: * ++
// 先出栈number stack中的++,处理++a,假设原本a = 230230,(64bit的情况下)++a后变成230238.

//   number stack: a
// operater stack: *
// 此时后置的++优先级依然比a低,因此还要继续出栈number stack中的*,给a dereference

//   number stack: *a
// operater stack: ++
// 此时置入=,=的优先级依然比++低,因此还要继续出栈number stack中的++,因此(*a)++

// 然后i进入operater stack
//   number stack: *a  i
// operater stack: ++

// 此时置入=,=的优先级依然比++低,因此还要继续出栈number stack中的++,因此(*a)++
//   number stack: *a  i
// operater stack:  =

// ==的优先级比=高,所以==可以直接入operater stack
//   number stack: *a  i  0
// operater stack:  =  ==

//三元操作符? :的优先级很低,不能直接入栈
//先运算栈中的i==0,假设i=0,结果为true,然后操作符==出栈,运算的数字i和0出栈,结果true入栈
//   number stack: *a  true
// operater stack: =    ?:

//运算已经到末尾,此时会对?:做处理,true时是2+3
//   number stack: *a  2  3
// operater stack: =   +

//继续出栈符号和数字
//   number stack: *a  5
// operater stack: = 

//运算的最后一步,赋值操作 *a = 5
//表达式完成求值

上面的
number stack: 可以通过压入stack中实现
operater stack:可以通过递归调用的函数栈实现


parse_expr()函数:

int type; // pass type in recursive parse expr
void parse_expr(int precd) {   //precd会传入一个优先级,就是目前栈顶的operater的优先级,是个枚举变量对应的数值
    int tmp_type, i;
    int* tmp_ptr;
    // const number
    if (token == Num) {
        tokenize();
        *++code = IMM;
        *++code = token_val;
        type = INT;
    } 
    // const string
    else if (token == '"') {
        *++code = IMM;
        *++code = token_val; // string addr
        assert('"'); while (token == '"') assert('"'); // handle multi-row
        data = (char*)((int)data + 8 & -8); // add \0 for string & align 8
        type = PTR;
    }
    else if (token == Sizeof) { //这里把sizeof当成一个数据,因为sizeof的返回值是int
        tokenize(); assert('(');
        type = parse_base_type();
        while (token == Mul) {assert(Mul); type = type + PTR;}
        assert(')');
        *++code = IMM;
        *++code = (type == CHAR) ? 1 : 8; 
        type = INT;
    }
    // handle identifer: variable or function all
    else if (token == Id) { //如果token=Id,那么可能是变量,也有可能是函数调用
        tokenize();
        tmp_ptr = symbol_ptr; // for recursive parse
        // function call 函数调用
        if (token == '(') {
            assert('(');
            i = 0; // number of args
            while (token != ')') {
                parse_expr(Assign);
                *++code = PUSH; i++;
                if (token == ',') assert(',');
            } assert(')');
            // native call
            if (tmp_ptr[Class] == Sys) *++code = tmp_ptr[Value];
            // fun call
            else if (tmp_ptr[Class] == Fun) {*++code = CALL; *++code = tmp_ptr[Value];}
            else {printf("line %lld: invalid function call\n", line); exit(-1);}
            // delete stack frame for args
            if (i > 0) {*++code = DARG; *++code = i;}
            type = tmp_ptr[Type];
        }
        // handle enum value 存储数据的变量名
        else if (tmp_ptr[Class] == Num) {
            *++code = IMM; *++code = tmp_ptr[Value]; type = INT;
        }
        // handle variables
        else {
            // local var, calculate addr base ibp
            if (tmp_ptr[Class] == Loc) {*++code = LEA; *++code = ibp - tmp_ptr[Value];}
            // global var
            else if (tmp_ptr[Class] == Glo) {*++code = IMM; *++code = tmp_ptr[Value];}
            else {printf("line %lld: invalid variable\n", line); exit(-1);}
            type = tmp_ptr[Type];
            *++code = (type == CHAR) ? LC : LI;
        }
    }
    // cast or parenthesis
    else if (token == '(') {
        assert('(');
        if (token == Char || token == Int) {
            tokenize();
            tmp_type = token - Char + CHAR;
            while (token == Mul) {assert(Mul); tmp_type = tmp_type + PTR;}
            // use precedence Inc represent all unary operators
            assert(')'); parse_expr(Inc); type = tmp_type;
        } else {
            parse_expr(Assign); assert(')');
        }
    }
    // derefer
    else if (token == Mul) {
        tokenize(); parse_expr(Inc);
        if (type >= PTR) type = type - PTR;
        else {printf("line %lld: invalid dereference\n", line); exit(-1);}
        *++code = (type == CHAR) ? LC : LI;
    }
    // reference
    else if (token == And) {
        tokenize(); parse_expr(Inc);
        if (*code == LC || *code == LI) code--; // rollback load by addr
        else {printf("line %lld: invalid reference\n", line); exit(-1);}
        type = type + PTR;
    }
    // Not
    else if (token == '!') {
        tokenize(); parse_expr(Inc);
        *++code = PUSH; *++code = IMM; *++code = 0; *++code = EQ;
        type = INT;
    }
    // bitwise
    else if (token == '~') {
        tokenize(); parse_expr(Inc);
        *++code = PUSH; *++code = IMM; *++code = -1; *++code = XOR;
        type = INT;
    }
    // positive
    else if (token == And) {tokenize(); parse_expr(Inc); type = INT;}
    // negative
    else if (token == Sub) {
        tokenize(); parse_expr(Inc);
        *++code = PUSH; *++code = IMM; *++code = -1; *++code = MUL;
        type = INT;
    }
    // ++var --var 前置的++,--
    else if (token == Inc || token == Dec) {
        i = token; tokenize(); parse_expr(Inc);
        // save var addr, then load var val
        if (*code == LC) {*code = PUSH; *++code = LC;}
        else if (*code == LI) {*code = PUSH; *++code = LI;}
        else {printf("line %lld: invalid Inc or Dec\n", line); exit(-1);}
        *++code = PUSH; // save var val
        *++code = IMM; *++code = (type > PTR) ? 8 : 1;
        *++code = (i == Inc) ? ADD : SUB; // calculate
        *++code = (type == CHAR) ? SC : SI; // write back to var addr
    }
    else {printf("line %lld: invalid expression\n", line); exit(-1);}
    // use [precedence climbing] method to handle binary(or postfix) operators
    // 如果token优先级是比栈顶更高的话,就进入while循环
    while (token >= precd) {
        tmp_type = type;    
        // assignment 赋值
        if (token == Assign) {
            tokenize();
            if (*code == LC || *code == LI) *code = PUSH;
            else {printf("line %lld: invalid assignment\n", line); exit(-1);}
            parse_expr(Assign); type = tmp_type; // type can be cast
            *++code = (type == CHAR) ? SC : SI;
        }
        // ? :, same as if stmt
        else if (token == Cond) {
            tokenize(); *++code = JZ; tmp_ptr = ++code;
            parse_expr(Assign); assert(':');
            *tmp_ptr = (int)(code + 3);
            *++code = JMP; tmp_ptr = ++code; // save endif addr
            parse_expr(Cond);
            *tmp_ptr = (int)(code + 1); // write back endif point
        }
        // logic operators, simple and boring, copy from c4
        else if (token == Lor) {
            tokenize(); *++code = JNZ; tmp_ptr = ++code;
            parse_expr(Land); *tmp_ptr = (int)(code + 1); type = INT;}
        else if (token == Land) {
            tokenize(); *++code = JZ; tmp_ptr = ++code;
            parse_expr(Or); *tmp_ptr = (int)(code + 1); type = INT;}
        //                                   ↓ 把ax压入栈中  ↓递归调用,解析剩下的表达式 ↓完成递归回到这里就要计算对应operater的操作符了
        else if (token == Or)  {tokenize(); *++code = PUSH; parse_expr(Xor); *++code = OR;  type = INT;}
        else if (token == Xor) {tokenize(); *++code = PUSH; parse_expr(And); *++code = XOR; type = INT;}
        else if (token == And) {tokenize(); *++code = PUSH; parse_expr(Eq);  *++code = AND; type = INT;}
        else if (token == Eq)  {tokenize(); *++code = PUSH; parse_expr(Lt);  *++code = EQ;  type = INT;}
        else if (token == Ne)  {tokenize(); *++code = PUSH; parse_expr(Lt);  *++code = NE;  type = INT;}
        else if (token == Lt)  {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = LT;  type = INT;}
        else if (token == Gt)  {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = GT;  type = INT;}
        else if (token == Le)  {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = LE;  type = INT;}
        else if (token == Ge)  {tokenize(); *++code = PUSH; parse_expr(Shl); *++code = GE;  type = INT;}
        else if (token == Shl) {tokenize(); *++code = PUSH; parse_expr(Add); *++code = SHL; type = INT;}
        else if (token == Shr) {tokenize(); *++code = PUSH; parse_expr(Add); *++code = SHR; type = INT;}
        // arithmetic operators
        else if (token == Add) {   // 加
            tokenize(); *++code = PUSH; parse_expr(Mul);
            // int pointer * 8
            if (tmp_type > PTR) {*++code = PUSH; *++code = IMM; *++code = 8; *++code = MUL;}
            *++code = ADD; type = tmp_type;
        }
        else if (token == Sub) {   // 减
            tokenize(); *++code = PUSH; parse_expr(Mul);
            if (tmp_type > PTR && tmp_type == type) {
                // pointer - pointer, ret / 8
                *++code = SUB; *++code = PUSH;
                *++code = IMM; *++code = 8;
                *++code = DIV; type = INT;}
            else if (tmp_type > PTR) {
                *++code = PUSH;
                *++code = IMM; *++code = 8;
                *++code = MUL;
                *++code = SUB; type = tmp_type;}
            else *++code = SUB;
        }
        else if (token == Mul) {tokenize(); *++code = PUSH; parse_expr(Inc); *++code = MUL; type = INT;}
        else if (token == Div) {tokenize(); *++code = PUSH; parse_expr(Inc); *++code = DIV; type = INT;}
        else if (token == Mod) {tokenize(); *++code = PUSH; parse_expr(Inc); *++code = MOD; type = INT;}
        // var++, var--   后置的++,--
        else if (token == Inc || token == Dec) {
            if (*code == LC) {*code = PUSH; *++code = LC;} // save var addr
            else if (*code == LI) {*code = PUSH; *++code = LI;}
            else {printf("%lld: invlid operator=%lld\n", line, token); exit(-1);}
            *++code = PUSH; *++code = IMM; *++code = (type > PTR) ? 8 : 1;
            *++code = (token == Inc) ? ADD : SUB;
            *++code = (type == CHAR) ? SC : SI; // save value ++ or -- to addr
            *++code = PUSH; *++code = IMM; *++code = (type > PTR) ? 8 : 1;
            *++code = (token == Inc) ? SUB : ADD; // restore value before ++ or --
            tokenize();
        }
        // a[x] = *(a + x)
        else if (token == Brak) {
            assert(Brak); *++code = PUSH; parse_expr(Assign); assert(']');
            if (tmp_type > PTR) {*++code = PUSH; *++code = IMM; *++code = 8; *++code = MUL;}
            else if (tmp_type < PTR) {printf("line %lld: invalid index op\n", line); exit(-1);}
            *++code = ADD; type = tmp_type - PTR;
            *++code = (type == CHAR) ? LC : LI;
        }
        else {printf("%lld: invlid token=%lld\n", line, token); exit(-1);}
    }
}
  • 7
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

爱吃小酥肉的小波

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值