C编译器02-转换器

小乌鱼

已于 2022-09-09 07:24:13 修改

阅读量744

点赞数

分类专栏： c 文章标签： c语言算法 c++

于 2022-09-08 22:45:00 首次发布

本文链接：https://blog.csdn.net/zhanglianzhu_91/article/details/126773497

版权

c 专栏收录该内容

24 篇文章 0 订阅

订阅专栏

acwj编译器从0开始到编译器自举

上一篇介绍扫描器，一个个字符扫描代码，识别出指定的令牌Token打印输出，即代码里的元素。要把代码编译成另一种语言还需要进行转换。这里就用的AST树。

用一个二叉树把Token表示为AST树。这里扫描时候利用递归把一个个Token组装得到整颗AST树。然后这里还没编译成机器码，执行时候只是遍历AST树进行运算。遍历也采用递归。目前只是构造AST树，还没按运算符的优先级构造，所以算的结果还不是按优先级运算的结果。代码还不太复杂，有c基础基本能搞懂。

整个代码下载

重要部分。

树结构体：

//AST节点的结构体，op就是上面的枚举
// Abstract Syntax Tree structure
struct ASTnode {
	//操作类别
	int op;				// "Operation" to be performed on this tree
	//左节点
	struct ASTnode* left;			// Left and right child trees
	//右节点
	struct ASTnode* right;
	//值
	int intvalue;				// For A_INTLIT, the integer value
};

递归得到整颗AST树

//返回一个根是二进制操作的AST树
// Return an AST tree whose root is a binary operator
struct ASTnode* binexpr(void) {
	struct ASTnode* n, * left, * right;
	int nodetype;
	//得到第一个叶子节点
	// Get the integer literal on the left.
	// Fetch the next token at the same time.
	left = primary();
	//如果是结束了返回该节点
	// If no tokens left, return just the left node
	if (Token.token == T_EOF)
		return (left);
	//转换下一个令牌类型到AST类型
	// Convert the token into a node type
	nodetype = arithop(Token.token);
	//扫描下一个令牌
	// Get the next token in
	scan(&Token);
	//递归得到右节点
	// Recursively get the right-hand tree
	right = binexpr();
	//构造AST节点
	// Now build a tree with both sub-trees
	n = mkastnode(nodetype, left, right, 0);
	return (n);
}

递归执行树运算，后面把运行部分处理为汇编代码就是实际生成机器码了。

// 给定一个 AST，解释
// 里面的操作符并返回
// 最终值。
// Given an AST, interpret the
// operators in it and return
// a final value.
int interpretAST(struct ASTnode* n) {
	int leftval, rightval;
	//得到左节点的值
	// Get the left and right sub-tree values
	if (n->left)
		leftval = interpretAST(n->left);
	//得到右节点的值
	if (n->right)
		rightval = interpretAST(n->right);

	// Debug: Print what we are about to do
	//整数的AST节点打印值
	if (n->op == A_INTLIT)
		printf("int %d\n", n->intvalue);
	//其他的打印左值 操作 右值
	else
		printf("%d %s %d\n", leftval, ASTop[n->op], rightval);
	//进行计算
	switch (n->op) {
		//加操作
	case A_ADD:
		return (leftval + rightval);
		//减操作
	case A_SUBTRACT:
		return (leftval - rightval);
		//乘操作
	case A_MULTIPLY:
		return (leftval * rightval);
		//除操作
	case A_DIVIDE:
		return (leftval / rightval);
		//整数操作
	case A_INTLIT:
		return (n->intvalue);
		//未实现的操作
	default:
		fprintf(stderr, "Unknown AST operator %d\n", n->op);
		exit(1);
	}
}

各代码文件

data.h

#ifndef extern_
 #define extern_ extern
#endif

// Global variables
// Copyright (c) 2019 Warren Toomey, GPL3

//当前读的行数
extern_ int Line;
//记录最后一个回退的字符
extern_ int	Putback;
//读的代码文件
extern_ FILE* Infile;
//存最后扫描的令牌
extern_ struct token	Token;

decl.h


// Function prototypes for all compiler files
// Copyright (c) 2019 Warren Toomey, GPL3

//扫描方法申明
int scan(struct token *t);

//制作一个AST节点
struct ASTnode *mkastnode(int op, struct ASTnode *left,struct ASTnode *right, int intvalue);

//制作一个AST叶子
struct ASTnode *mkastleaf(int op, int intvalue);

//制作一个只有左子节点的AST节点
struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue);

//得到AST树
struct ASTnode *binexpr(void);

//执行AST树
int interpretAST(struct ASTnode *n);

defs.h

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

// Structure and enum definitions
// Copyright (c) 2019 Warren Toomey, GPL3

// Token types
// 扫描得到的令牌类别
// 加、减、乘、除、整数
enum {
	T_EOF, T_PLUS, T_MINUS, T_STAR, T_SLASH, T_INTLIT
};

/*我们从一个简单的词法扫描器开始我们的编译器编写之旅。
正如我在上一部分中提到的，扫描仪的工作
是识别输入语言中的词汇元素或*tokens*。
我们将从只有五个词汇元素的语言开始：
 + 四个基本数学运算符：`*`、`/`、`+` 和 `-`
 + 具有 1 个或多个数字的十进制整数 `0` .. `9`
我们扫描的每个令牌都将存储在这个结构中*/

/*当标记是一个`T_INTLIT`（即一个整数文字）时，`intvalue`
字段将保存我们扫描的整数的值。*/

// 令牌结构体
// Token structure
struct token {
	//类别
	int token;				// Token type, from the enum list above
	//值
	int intvalue;				// For T_INTLIT, the integer value
};

// AST节点类别
// 加、减、乘、除、整数
// AST node types
enum {
	A_ADD, A_SUBTRACT, A_MULTIPLY, A_DIVIDE, A_INTLIT
};

//AST节点的结构体，op就是上面的枚举
// Abstract Syntax Tree structure
struct ASTnode {
	//操作类别
	int op;				// "Operation" to be performed on this tree
	//左节点
	struct ASTnode* left;			// Left and right child trees
	//右节点
	struct ASTnode* right;
	//值
	int intvalue;				// For A_INTLIT, the integer value
};

scan.c

#include "defs.h"
#include "data.h"
#include "decl.h"

// Lexical scanning
// Copyright (c) 2019 Warren Toomey, GPL3

/*包含我们的词法扫描器的功能。我们要去了
从我们的输入文件中一次读取一个字符。然而，会有
如果我们读得太远，有时我们需要“放回”一个字符
在输入流中领先。我们还想跟踪我们目前在哪条线路
以便我们可以在调试消息中打印行号。所有这些
由 `next()` 函数完成*/

//找到字符c在字符串s的位置，没找到返回-1
// Return the position of character c
// in string s, or -1 if c not found
static int chrpos(char* s, int c) {
    char* p;

    p = strchr(s, c);
    return (p ? p - s : -1);
}

//从输入文件里得到下一个字符位置
// Get the next character from the input file.
static int next(void) {
    int c;

    if (Putback) {		// Use the character put
        c = Putback;		// back if there is one
        Putback = 0;
        return c;
    }

    c = fgetc(Infile);		// Read from input file
    if ('\n' == c)
        Line++;			// Increment line count
    return c;
}

//回退一个不想要的字符
// Put back an unwanted character
static void putback(int c) {
    Putback = c;
}

//尝试跳过不想要的字符
// Skip past input that we don't need to deal with, 
// i.e. whitespace, newlines. Return the first
// character we do need to deal with.
static int skip(void) {
    int c;

    c = next();
    //空格、TAB、换行、回车等跳过
    while (' ' == c || '\t' == c || '\n' == c || '\r' == c || '\f' == c) {
        c = next();
    }
    return (c);
}

//扫描数字
// Scan and return an integer literal
// value from the input file. Store
// the value as a string in Text.
static int scanint(int c) {
    int k, val = 0;
    //循环检查字符，直到不是数字
    // Convert each character into an int value
    while ((k = chrpos("0123456789", c)) >= 0) {
        val = val * 10 + k;
        c = next();
    }
    //命中了非数字字符就抛回去
    // We hit a non-integer character, put it back.
    putback(c);
    return val;
}

// 扫描并返回在输入中找到的下一个标记。
// 如果令牌有效，则返回 1，如果没有令牌，则返回 0。
// Scan and return the next token found in the input.
// Return 1 if token valid, 0 if no tokens left.
int scan(struct token* t) {
    int c;
    //先跳过忽略字符到第一个有效字符
    // Skip whitespace
    c = skip();

    // Determine the token based on
    // the input character
    switch (c) {
    case EOF:
        t->token = T_EOF;
        return (0);
    case '+':
        t->token = T_PLUS;
        break;
    case '-':
        t->token = T_MINUS;
        break;
    case '*':
        t->token = T_STAR;
        break;
    case '/':
        t->token = T_SLASH;
        break;
    default:

        // If it's a digit, scan the
        // literal integer value in
        if (isdigit(c)) {
            t->intvalue = scanint(c);
            t->token = T_INTLIT;
            break;
        }

        printf("在第%d行无法识别的字符%c\n", Line, c);
        exit(1);
    }

    // We found a token
    return (1);
}

tree.c

#include "defs.h"
#include "data.h"
#include "decl.h"

// AST tree functions
// Copyright (c) 2019 Warren Toomey, GPL3

// Build and return a generic AST node
struct ASTnode *mkastnode(int op, struct ASTnode *left,struct ASTnode *right, int intvalue) {
  struct ASTnode *n;
  //申请AST节点内存
  // Malloc a new ASTnode
  n = (struct ASTnode *) malloc(sizeof(struct ASTnode));
  //内存没申请成功
  if (n == NULL) {
    fprintf(stderr, "Unable to malloc in mkastnode()\n");
    exit(1);
  }
  //设置属性值
  // Copy in the field values and return it
  n->op = op;
  n->left = left;
  n->right = right;
  n->intvalue = intvalue;
  return (n);
}

//制作AST的叶子节点
// Make an AST leaf node
struct ASTnode *mkastleaf(int op, int intvalue) {
  return (mkastnode(op, NULL, NULL, intvalue));
}

//制作一个只有左子节点的AST节点
// Make a unary AST node: only one child
struct ASTnode *mkastunary(int op, struct ASTnode *left, int intvalue) {
  return (mkastnode(op, left, NULL, intvalue));
}

expr.c

#include "defs.h"
#include "data.h"
#include "decl.h"

// Parsing of expressions
// Copyright (c) 2019 Warren Toomey, GPL3

//把一个整数令牌产生一个AST叶子节点
// Parse a primary factor and return an
// AST node representing it.
static struct ASTnode* primary(void) {
	struct ASTnode* n;
	//整数类型的令牌构造一个叶子AST节点并且遍历下一个令牌
	// For an INTLIT token, make a leaf AST node for it
	// and scan in the next token. Otherwise, a syntax error
	// for any other token type.
	switch (Token.token) {
	case T_INTLIT:
		//产生叶子AST
		n = mkastleaf(A_INTLIT, Token.intvalue);
		//扫描下一个林工
		scan(&Token);
		return (n);
	default:
		fprintf(stderr, "syntax error on line %d\n", Line);
		exit(1);
	}
}

//把一个令牌类型转换成一个AST类型
// Convert a token into an AST operation.
int arithop(int tok) {
	switch (tok) {
	case T_PLUS:
		return (A_ADD);
	case T_MINUS:
		return (A_SUBTRACT);
	case T_STAR:
		return (A_MULTIPLY);
	case T_SLASH:
		return (A_DIVIDE);
	case T_INTLIT:
		return (A_INTLIT);
	default:
		fprintf(stderr, "不知道的令牌类别%d在第%d行\n", tok, Line);
		exit(1);
	}
}

//返回一个根是二进制操作的AST树
// Return an AST tree whose root is a binary operator
struct ASTnode* binexpr(void) {
	struct ASTnode* n, * left, * right;
	int nodetype;
	//得到第一个叶子节点
	// Get the integer literal on the left.
	// Fetch the next token at the same time.
	left = primary();
	//如果是结束了返回该节点
	// If no tokens left, return just the left node
	if (Token.token == T_EOF)
		return (left);
	//转换下一个令牌类型到AST类型
	// Convert the token into a node type
	nodetype = arithop(Token.token);
	//扫描下一个令牌
	// Get the next token in
	scan(&Token);
	//递归得到右节点
	// Recursively get the right-hand tree
	right = binexpr();
	//构造AST节点
	// Now build a tree with both sub-trees
	n = mkastnode(nodetype, left, right, 0);
	return (n);
}

interp.c

#include "defs.h"
#include "data.h"
#include "decl.h"

// AST tree interpreter
// Copyright (c) 2019 Warren Toomey, GPL3
// 
//AST的操作符
// List of AST operators
static char* ASTop[] = { "+", "-", "*", "/" };

// 给定一个 AST，解释
// 里面的操作符并返回
// 最终值。
// Given an AST, interpret the
// operators in it and return
// a final value.
int interpretAST(struct ASTnode* n) {
	int leftval, rightval;
	//得到左节点的值
	// Get the left and right sub-tree values
	if (n->left)
		leftval = interpretAST(n->left);
	//得到右节点的值
	if (n->right)
		rightval = interpretAST(n->right);

	// Debug: Print what we are about to do
	//整数的AST节点打印值
	if (n->op == A_INTLIT)
		printf("int %d\n", n->intvalue);
	//其他的打印左值 操作 右值
	else
		printf("%d %s %d\n", leftval, ASTop[n->op], rightval);
	//进行计算
	switch (n->op) {
		//加操作
	case A_ADD:
		return (leftval + rightval);
		//减操作
	case A_SUBTRACT:
		return (leftval - rightval);
		//乘操作
	case A_MULTIPLY:
		return (leftval * rightval);
		//除操作
	case A_DIVIDE:
		return (leftval / rightval);
		//整数操作
	case A_INTLIT:
		return (n->intvalue);
		//未实现的操作
	default:
		fprintf(stderr, "Unknown AST operator %d\n", n->op);
		exit(1);
	}
}

main.c

#include "defs.h"
#define extern_
#include "data.h"
#undef extern_
#include "decl.h"
#include <errno.h>

// Compiler setup and top-level execution
// Copyright (c) 2019 Warren Toomey, GPL3

//初始化全局变量
// Initialise global variables
static void init() {
	//行数设置1
	Line = 1;
	//抛回变量设置为换行
	Putback = '\n';
}

//打印使用示例
// Print out a usage if started incorrectly
//prog:程序名
static void usage(char* prog) {
	fprintf(stderr, "使用示例: %s 输入文件名\n", prog);
	exit(1);
}

// Main program: check arguments and print a usage
// if we don't have an argument. Open up the input
// file and call scanfile() to scan the tokens in it.
void main(int argc, char *argv[]) {
	//定义一个AST节点
  struct ASTnode *n;
  //参数数量不够2说明没跟代码名称，打印使用方式
  if (argc != 2)
  {
	  usage(argv[0]);
  }
    
  //初始化
  init();
  //打开代码文件
  if ((Infile = fopen(argv[1], "r")) == NULL) {
    fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno));
    exit(1);
  }
  //扫描一个令牌，扫描看上一个扫描实现
  scan(&Token);			// Get the first token from the input
  //变量代码得到AST树
  n = binexpr();		// Parse the expression in the file
  //打印AST树的执行结果
  printf("%d\n", interpretAST(n));	// Calculate the final result
  exit(0);
}

input01

2 + 3 * 5 - 8 / 3

input02

intput03

12 34 + -56 * / - - 8 + * 2

input04

23 +
18 -
45.6 * 2
/ 18

input05

23 * 456abcdefg

编译测试

[root@zlzlinux 02_Parser]# 
[root@zlzlinux 02_Parser]# make
cc -o parser -g expr.c interp.c main.c scan.c tree.c
[root@zlzlinux 02_Parser]# ./parser input01 
int 2
int 3
int 5
int 8
int 3
8 / 3
5 - 2
3 * 3
2 + 9
11
[root@zlzlinux 02_Parser]# ./parser input02
int 13
int 6
int 4
int 5
int 8
int 3
8 / 3
5 + 2
4 * 7
6 + 28
13 - 34
-21
[root@zlzlinux 02_Parser]# ./parser input03
syntax error on line 1
[root@zlzlinux 02_Parser]# ./parser input03
syntax error on line 1
[root@zlzlinux 02_Parser]# ./parser input04
՚µسѐϞ·¨ʶ±𶅗ַ 
[root@zlzlinux 02_Parser]# ./parser input04
在第3行无法识别的字符.
[root@zlzlinux 02_Parser]# ./parser input05
在第1行无法识别的字符a
[root@zlzlinux 02_Parser]#