动手实现编译器（十二）——全局变量

最新推荐文章于 2023-05-26 16:56:57 发布

qq_34851605

最新推荐文章于 2023-05-26 16:56:57 发布

阅读量636

点赞数

分类专栏：编译器文章标签：编译器

本文链接：https://blog.csdn.net/qq_34851605/article/details/118198070

版权

编译器专栏收录该内容

16 篇文章 33 订阅

订阅专栏

在上一节中，我们实现了函数的调用和返回，在这一节中，我们来处理真正的全局变量。当我们把变量定义的语句放入Block语句分析函数中，我们只能分析Block中的变量定义，即不能处理全局变量。所以我决定将全局变量声明移出函数声明。实际上，我也将变量声明的解析留在了函数内部，因为稍后我们会将它们更改为局部变量声明。此外，我们还会实现可以同时声明多个相同类型的变量，例如

  int x, y, z;

完整的变量定义语法

编译单元 CompUnit → [ CompUnit ] ( Decl | FuncDef )
声明 Decl → ConstDecl | VarDecl
基本类型 BType → ‘int’
变量声明 VarDecl → BType VarDef { ‘,’ VarDef } ‘;’
变量定义 VarDef → Ident { ‘[’ ConstExp ‘]’ }
函数定义 FuncDef → FuncType Ident ‘(’ [FuncFParams] ‘)’ Block
语句块 Block → ‘{’ { BlockItem } ‘}’
语句块项 BlockItem → Decl | Stmt

修改词法分析

新增单词’,’，并将其定义为T_COMMA类型。在scan()函数中添加对’,'的解析语句

case ',':   t->token = T_COMMA;  break;

新增parse_type()函数，来识别数据类型

// 解析当前单词并返回类型枚举值,同时扫描下一个单词
int parse_type()
{
    int type;
    switch (Token.token)
    {
        case T_VOID:   type = P_VOID; break;
        case T_KEYINT: type = P_INT;  break;
        default:
        {
            fprintf(stderr, "Illegal type, token:%d on line %d\n", Token.token, Line);
            exit(1);
        }
    }
    scan(&Token);
    return type;
}

修改var_declaration()，使其能解析’,'的情况

// 解析一个或多个全局变量或函数
void global_declarations()
{
    struct ASTnode *tree;
    int type;
    while (1)
    {
        type = parse_type();
        ident();
        if (Token.token == T_LPAREN)
        {
            // 解析函数声明，并生成汇编代码
            tree = function_declaration(type);
            code_generator(tree, NOREG, 0);
        }
        else
        {
            // 解析全局变量声明
            var_declaration(type);
        }
        // 遇到EOF结束
        if (Token.token == T_EOF)   break;
    }
}

同时，在function_declaration函数中，做出对应的修改

// 分析简单函数声明
struct ASTnode *function_declaration(int type)
{
    struct ASTnode *tree, *finalstmt;
    int nameslot, endlabel;
    // 匹配'void'或'int'、函数名和'(' ')',
    // 但不做任何处理
    if (type != P_VOID && type != P_INT)
    {
        fprintf(stderr, "Void or int expected on line %d\n", Line);
        exit(1);
    }
    // 获取结束标签的label-id,
    // 将函数添加到符号表中,
    // 将Functionid设置为函数的符号id
    endlabel = label();
    nameslot = add_global(Text, type, S_FUNCTION, endlabel);
    Functionid = nameslot;
    /*...继续分析...*/

修改语法分析

在single_statement函数的KEYINT类别中，做出对应的修改

// 分析一条语句，并返回其AST树
struct ASTnode *single_statement()
{
    int type;
    switch (Token.token)
    {
        case T_PRINT:   return print_statement();
        case T_KEYINT:  type = parse_type(); ident(); var_declaration(type);  return NULL; // 没有AST树生成
		/*...继续分析...*/
    }
}

重构变量和常量的汇编

我在上一节说过，随着功能的增加，我们会不断修改代码，使功能更加完善和效率更高。
首先是ARM汇编中直接MOV操作的立即数最大为11位，超过十一位的立即数将会先储存在内存中，在载入寄存器。我们对立即数载入函数进行修改：

#define MAXINTS      1024   // 大整数储存上限
int Intlist[MAXINTS];                   // 大整数储存表
int Intslot = 0;                        // 大整数数量

// 从.L3标签确定大整数的偏移量,
// 如果整数不在列表中，则添加它
void set_int_offset(int val)
{
    int offset = -1;
    // 检查它是否已经存在
    for (int i = 0; i < Intslot; i++)
    {
        if (Intlist[i] == val)
        {
            offset = 4 * i;
            break;
        }
    }
    // 不在列表中，将它加入列表
    if (offset == -1)
    {
        offset = 4 * Intslot;
        if (Intslot == MAXINTS)
        {
            fprintf(stderr, "Out of int slots in set_int_offset() on line %d\n", Line);
            exit(1);
        }
        Intlist[Intslot++] = val;
    }
    // 从偏移地址载入数据到r3
    fprintf(Outfile, "\tldr\tr3, .L3+%d\n", offset + 4);	// 零地址为打印reg函数的格式符，所以偏移地址都加4
}

// 将立即数加载到寄存器中，返回寄存器编号
int arm_load_int(int value)
{
    // 获得新的寄存器
    int r = arm_alloc_register();
    // 小数字用MOV，大数字用LDR操作
    if (value <= 2000)
    {
        fprintf(Outfile, "\tmov\tr%d, #%d\n", r, value);
    }
    else
    {
        set_int_offset(value);
        fprintf(Outfile, "\tmov\tr%d, r3\n", r);
    }
    return r;
}

修改set_var_offset()，跳过函数变量，使生成汇编效率更高

void set_var_offset(int id)
{
    int offset = 0;
    for (int i = 0; i < id; i++)
    {
        if (Tsym[i].stype == S_VARIABLE) offset += 4;
    }
    fprintf(Outfile, "\tldr\tr3, .L2+%d\n", id * 4);
}

同时，修改arm_postamble()对应的变化

// 汇编尾代码
void arm_postamble()
{
    // 打印大整数
    fputs(
        ".L3:\n"
        "\t.word   .LC0\n",
    Outfile);
    for (int i = 0; i < Intslot; i++)
    {
        fprintf(Outfile, "\t.word	%d\n", Intlist[i]);
    }
    // 打印全局变量
    fprintf(Outfile, ".L2:\n");
    for (int i = 0; i < Globals; i++)
    {
        if (Tsym[i].stype == S_VARIABLE)
        {
            fprintf(Outfile, "\t.word	%s\n", Tsym[i].name);
        }
    }
}

修改arm_function_preamble()和arm_function_postamble()以适应后面的出入函数多参数和多return语句函数的处理。

// 生成函数前言
void arm_function_preamble(char *name)
{
    fprintf(Outfile,
        "\t.text\n"
        "\t.align  2\n"
        "\t.globl\t%s\n"
        "\t.type\t%s, %%function\n"
        "%s:\n"
        "\tpush    {fp, lr}\n"
        "\tadd     fp, sp, #4\n"
        "\tsub\tsp, sp, #8\n"
        "\tstr\tr0, [fp, #-8]\n", name, name, name);
}

// 生成函数结尾
void arm_function_postamble(int id)
{
    arm_label(Tsym[id].endlabel);
    fputs("\tsub\tsp, fp, #4\n" "\tpop\t{fp, pc}\n" "\t.align\t2\n", Outfile);
}

同时，在函数中做对应的修改

// 生成return语句代码
void arm_return(int reg, int id)
{
    if(Tsym[id].type == P_INT)
    {
        fprintf(Outfile, "\tmov\tr0, r%d\n", reg);
        arm_jump(Tsym[id].endlabel);
    }
    else
    {
        fprintf(stderr, "Bad function type in cgreturn:%d on line %d\n", Tsym[id].type, Line);
        exit(1);
    }
}

最后，重构arm_call()函数，复用寄存器，以减少寄存器申请的步骤

// 使用给定寄存器中的一个参数调用函数,返回带有结果的寄存器
int arm_call(int r, int id)
{
    fprintf(Outfile, "\tmov\tr0, r%d\n", r);
    fprintf(Outfile, "\tbl\t%s\n", Tsym[id].name);
    fprintf(Outfile, "\tmov\tr%d, r0\n", r);
    return r;
}

测试结果

修改main()函数，使之适应以上的改变

// 用法 compiler -o -s outfile infile
int main(int argc, char *argv[])
{
	/*...参数识别操作...*/
    scan(&Token);			            // 从输入中获得第一个单词
    arm_preamble();
    global_declarations();	            // 分析全局变量或者函数
    arm_postamble();
    fclose(Outfile);
}

输入：

int a,b;

int main()
{
    a = 3;
    b = 5;
    print a + b;
    return a + b;
}

输出（out.s）：

	.text
	.global __aeabi_idiv
	.section	.rodata
	.align  2
.LC0:
	.ascii  "%d\012\000"
	.text
	.comm	a,4,4
	.text
	.comm	b,4,4
	.text
	.align  2
	.globl	main
	.type	main, %function
main:
	push    {fp, lr}
	add     fp, sp, #4
	sub	sp, sp, #8
	str	r0, [fp, #-8]
	mov	r4, #3
	ldr	r3, .L2+0
	str	r4, [r3]
	mov	r4, #5
	ldr	r3, .L2+4
	str	r4, [r3]
	ldr	r3, .L2+0
	ldr	r4, [r3]
	ldr	r3, .L2+4
	ldr	r5, [r3]
	add	r4, r4, r5
	mov     r1, r4
	ldr     r0, .L3
	bl      printf
	ldr	r3, .L2+0
	ldr	r4, [r3]
	ldr	r3, .L2+4
	ldr	r5, [r3]
	add	r4, r4, r5
	mov	r0, r4
	b	L1
L1:
	sub	sp, fp, #4
	pop	{fp, pc}
	.align	2
.L3:
	.word   .LC0
.L2:
	.word   a
	.word   b

输出（out）：

总结

在这一节中，我们实现了真正的全局变量，同时还添加了’,'的变量定义。在下一节中，我们将重新考虑左值和右值的问题。

qq_34851605

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
动手实现编译器（十二）——全局变量

在上一节中，我们实现了函数的调用和返回，在这一节中，我们来处理全局变量。当我们把变量定义的语句放入Block语句分析函数中，我们只能分析Block中的变量定义，即不能处理全局变量。重构变量和常量的汇编我在上一节说过，随着功能的增加，我们会不断修改代码，使功能更加完善和效率更高。首先是ARM汇编中直接MOV操作的立即数最大为11位，超过十一位的立即数将会先储存在内存中，在载入寄存器。我们对立即数载入函数进行修改：#define MAXINTS 1024 // 大整数储存上限int In
复制链接

扫一扫