编译原理c语言--词法语法分析--简单四则运算计算器 (支持算符优先级与括号)--生成语法树然后计算结果

在windows vs2015测试通过


#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

// console get line input
void getline(char* input, int inputsize) {
	int len = inputsize;
	int idx = 0;
	input[idx] = '\0';
	while (1)
	{
		char c = fgetc(stdin);
		if (c == EOF)
			break;
		if (c == '\n')
		{
			input[idx++] = '\0';
			break;
		}
		else {
			input[idx++] = c;
		}
		if (idx >= len - 1)
		{
			input[idx++] = '\0';
			break;
		}
	}
}

// lexical analysis section
enum tokentype
{
	PLUS, MINUS, MUL, DIV, LPAREN, RPAREN, INTEGER
};

typedef struct token {
	int tokentype;
	char val[16];
	struct token* prev;
	struct token* next;
} token;

token* newtoken()
{
	token* tokennew = malloc(sizeof(token));
	tokennew->next = NULL;
	tokennew->prev = NULL;
	tokennew->val[0] = '\0';
	return tokennew;
}

void tokenmoveon(token** tokencurr)
{
	(*tokencurr)->next = newtoken();
	(*tokencurr)->next->next = NULL;
	(*tokencurr)->next->prev = (*tokencurr);
	(*tokencurr) = (*tokencurr)->next;
}

void printtoknetype(int type)
{
	switch (type)
	{
	case PLUS:
		printf("PLUS");
		break;
	case MINUS:
		printf("MINUS");
		break;
	case MUL:
		printf("MUL");
		break;
	case DIV:
		printf("DIV");
		break;
	case LPAREN:
		printf("LPAREN");
		break;
	case RPAREN:
		printf("RPAREN");
		break;
	case INTEGER:
		printf("INTEGER");
		break;
	default:
		break;
	}
}

void printtoken(token* thead)
{
	token* p;
	p = thead;
	printf("tokne:\n");
	while (p != NULL)
	{
		printtoknetype(p->tokentype);
		printf(":[%s] ", p->val);
		p = p->next;
	}
	printf("\n");
}

void tokenone(token** tokencurr, int tokentype, char op)
{
	(*tokencurr)->tokentype = tokentype;
	(*tokencurr)->val[0] = op;
	(*tokencurr)->val[1] = '\0';
	tokenmoveon(tokencurr);
}

token* tokenize(char* str)
{
	if (!*str) printf("no token\n");
	token* thead;
	token* tokencurr = newtoken();
	thead = tokencurr;

	int i, idx = 0;
	char word[16];
	for (i = 0; i < strlen(str); i++) {
		char next = str[i + 1];
		switch (str[i])
		{
		case ' ':
			continue;
		case '\0':
			break;
		case '+':  //after + will be num (after num will be a op) or (
			tokenone(&tokencurr, PLUS, '+');
			continue;
		case '-':
			tokenone(&tokencurr, MINUS, '-');
			continue;
		case '*':
			tokenone(&tokencurr, MUL, '*');
			continue;
		case '/':
			tokenone(&tokencurr, DIV, '/');
			continue;
		case '(':
			tokenone(&tokencurr, LPAREN, '(');
			continue;
		case ')':
			tokenone(&tokencurr, RPAREN, ')');
			continue;
		default:
			word[idx++] = str[i]; // digit num
			if (!isdigit(next))  // finish num
			{
				word[idx] = '\0';
				strcpy(tokencurr->val, word);
				idx = 0;
				tokencurr->tokentype = INTEGER;
				tokenmoveon(&tokencurr);
			}
			continue;
		}
	}
	//delete last token, is null token, because we tokenmoveon create token node first
	tokencurr = tokencurr->prev;
	free(tokencurr->next);
	tokencurr->next = NULL;
	return thead;

}

// syntax analysis section
typedef struct node {
	token* val; //op or num token, num node is leaf
	struct node* left; // when this is op node
	struct node* right;
} node;

node* newnode()
{
	node* nodenew = malloc(sizeof(node));

	nodenew->val  = NULL;
	nodenew->left = NULL;
	nodenew->right = NULL;

	return nodenew;
}

// syntax directed translation, each grammar is a function
/*
expr   : term ((PLUS | MINUS) term)*
term   : factor ((MUL | DIV) factor)*
factor : INTEGER | LPAREN expr RPAREN
*/

token* tokenp;  //token list index pointer
node* factor();
node* term();
node* expr();

node* factor()
{
	if (tokenp->tokentype == INTEGER)  // find num
	{
		node* one = newnode();
		one->val = tokenp;
		tokenp = tokenp->next;//skip num
		return one;
	}
	else if (tokenp->tokentype == LPAREN) //find (), to recursive descent parse
	{
		tokenp = tokenp->next;//skip LP
		node* one = expr(); // recursive, exit if the token is RPARENT, otherwise if match other token like integer or +-/* will be still in the recursive
		tokenp = tokenp->next; //skip RP
		return one;
	}
}
node* term()
{
	node* one = factor();
	
	while (tokenp!=NULL && (tokenp->tokentype == MUL || tokenp->tokentype == DIV))
	{
		node* op = newnode();
		op->val = tokenp;
		op->left = one;
		tokenp = tokenp->next;  //skip * or /
		op->right = factor();
		one = op;
	}
	
	return one;
}
node* expr()
{
	node* one = term();

	/*
	e.g. 1+2+3+4
	this will first create
	  +
	  /\
	 1  2
	then, add + 3 at the top, 
	     +
		/\
	   +  3
	  /\
	 1  2
	1+2*3 like
	   +
	   /\
      1   *
	      /\
		 2  3
    because plus->right = term(), and term is 2*3
	*/
	while (tokenp!=NULL && (tokenp->tokentype == PLUS || tokenp->tokentype == MINUS))
	{
		node* op = newnode();
		op->val = tokenp;
		op->left = one;
		tokenp = tokenp->next; //skip + or -
		op->right = term();
		one = op;
	}
	
	return one;
}

//low order in the bottom of tree
//same order from bottom left to top right tree
node* ast(token* tokenlist)
{
	tokenp = tokenlist;
	return expr();
}

int calc(int a, int b, char op)
{
	switch (op)
	{
	case '+':
		return a + b;
	case '-':
		return a - b;
	case '*':
		return a * b;
	case '/':
		return a / b;
	}
}

//the node is oprator +-/*
int isop(int tokentype)
{
	if (tokentype == PLUS
		|| tokentype == MINUS
		|| tokentype == MUL
		|| tokentype == DIV)
	{
		return 1;
	}
	return 0;
}
//postorder traversal 
int eval(node* nodelist)
{
	int left=0, right=0;
	if (isop(nodelist->val->tokentype))
	{

		if (isop(nodelist->left->val->tokentype))
		{
			left=eval(nodelist->left);
		}
		if (isop(nodelist->right->val->tokentype))
		{
			right=eval(nodelist->right);
		}
	}
	//if nodelist->left->val->val is not a integer, like +-*/, not a integer ascii atoi will return 0
	return calc(atoi(nodelist->left->val->val)+left, atoi(nodelist->right->val->val)+right, nodelist->val->val[0]);

}
int main()
{
	char input[128];
	getline(input, 128);

	token* tokenlist = tokenize(input);
	printtoken(tokenlist);

	node* nodelist = ast(tokenlist);
	
	int res = eval(nodelist);
	printf("res:%d\n", res);
	return 0;
}

/*
1+2+3+4*5*6*7-8-9  tree view
                 -
				/ \
			    -  9
			   / \
			  +  8
			 /  \
		   +     *
          /\     /\
		 +  3   *  7
		/\     /\
	   1  2   *  6
	          /\
			 4  5

first 1+2,then +3, then + with plus->right = term() to get all 4*5*6*7
first 4*5,then *6,*7, then add the node to +
then -8, -9
*/
  • 7
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
编译原理中的语法分析是将代码转换为抽象语法树的过程。在C语言中,语法分析器通常使用自顶向下的递归下分析方法。以下是一个简单C语言代码示和相应的语分析器实现: 设我们有以下C语言代码: ```c #include <stdio.h> int main() { int a = 1; int b = 2; int c = a + b; printf("The sum of a and b is %d", c); return 0; } ``` 我们可以使用递归下降分析器来解析这段代码。下面是一个简单的实现: ```python import re # 定义C语言的语法规则 # 这里只列出了一部分规则,实际上C语言的语法规则非常复杂 rules = { 'program': ['INCLUDES main'], 'INCLUDES': ['include <stdio.h>'], 'main': ['INT MAIN LPAREN RPAREN LBRACE declarations statements RETURN NUMBER SEMI RBRACE'], 'declarations': ['INT ID SEMI declarations', ''], 'statements': ['statement statements', ''], 'statement': ['assignment_statement', 'print_statement'], 'assignment_statement': ['INT ID EQUALS NUMBER SEMI'], 'print_statement': ['PRINTF LPAREN STRING COMMA ID RPAREN SEMI'] } # 定义一个Token类来表示代码中的单词 class Token: def __init__(self, type, value): self.type = type self.value = value def __str__(self): return 'Token({type}, {value})'.format( type=self.type, value=repr(self.value) ) # 定义一个Lexer类来将代码转换为Token序列 class Lexer: def __init__(self, text): self.text = text self.pos = 0 self.current_char = self.text[self.pos] def error(self): raise Exception('Invalid character') def advance(self): self.pos += 1 if self.pos > len(self.text) - 1: self.current_char = None else: self.current_char = self.text[self.pos] def skip_whitespace(self): while self.current_char is not None and self.current_char.isspace(): self.advance() def integer(self): result = '' while self.current_char is not None and self.current_char.isdigit(): result += self.current_char self.advance() return int(result) def get_next_token(self): while self.current_char is not None: if self.current_char.isspace(): self.skip_whitespace() continue if self.current_char.isdigit(): return Token('NUMBER', self.integer()) if self.current_char == '+': self.advance() return Token('PLUS', '+') if self.current_char == '-': self.advance() return Token('MINUS', '-') if self.current_char == '*': self.advance() return Token('MULTIPLY', '*') if self.current_char == '/': self.advance() return Token('DIVIDE', '/') if self.current_char == '(': self.advance() return Token('LPAREN', '(') if self.current_char == ')': self.advance() return Token('RPAREN', ')') if self.current_char == '{': self.advance() return Token('LBRACE', '{') if self.current_char == '}': self.advance() return Token('RBRACE', '}') if self.current_char == ';': self.advance() return Token('SEMI', ';') if self.current_char == '=': self.advance() return Token('EQUALS', '=') if self.current_char == ',': self.advance() return Token('COMMA', ',') if self.current_char == '"': self.advance() string = '' while self.current_char is not None and self.current_char != '"': string += self.current_char self.advance() if self.current_char == '"': self.advance() return Token('STRING', string) else: self.error() if self.current_char.isalpha(): word = '' while self.current_char is not None and (self.current_char.isalpha() or self.current_char.isdigit() or self.current_char == '_'): word += self.current_char self.advance() if word.upper() == 'INT': return Token('INT', 'int') if word.upper() == 'PRINTF': return Token('PRINTF', 'printf') if word.upper() == 'RETURN': return Token('RETURN', 'return') if word.upper() == 'MAIN': return Token('MAIN', 'main') if word.upper() == 'INCLUDE': return Token('INCLUDE', 'include') if word.upper() == 'STDIO': return Token('STDIO', 'stdio') return Token('ID', word) self.error() return Token('EOF', None) # 定义一个Parser类来将Token序列转换为抽象语法树 class Parser: def __init__(self, lexer): self.lexer = lexer self.current_token = self.lexer.get_next_token() def error(self): raise Exception('Invalid syntax') def eat(self, token_type): if self.current_token.type == token_type: self.current_token = self.lexer.get_next_token() else: self.error() def program(self): includes_node = self.includes() main_node = self.main() return (includes_node, main_node) def includes(self): self.eat('INCLUDE') self.eat('STDIO') self.eat('.') self.eat('H') return ('INCLUDES',) def main(self): self.eat('INT') self.eat('MAIN') self.eat('LPAREN') self.eat('RPAREN') self.eat('LBRACE') declarations_node = self.declarations() statements_node = self.statements() self.eat('RETURN') number_node = self.number() self.eat('SEMI') self.eat('RBRACE') return ('MAIN', declarations_node, statements_node, number_node) def declarations(self): declarations_node = ('DECLARATIONS',) while self.current_token.type == 'INT': declaration_node = self.declaration() declarations_node += (declaration_node,) return declarations_node def declaration(self): self.eat('INT') id_node = self.variable() self.eat('SEMI') return ('DECLARATION', id_node) def variable(self): token = self.current_token self.eat('ID') return ('VAR', token.value) def statements(self): statements_node = ('STATEMENTS',) while self.current_token.type in ['ID', 'PRINTF']: statement_node = self.statement() statements_node += (statement_node,) return statements_node def statement(self): if self.current_token.type == 'ID': assignment_statement_node = self.assignment_statement() return assignment_statement_node elif self.current_token.type == 'PRINTF': print_statement_node = self.print_statement() return print_statement_node def assignment_statement(self): id_node = self.variable() self.eat('EQUALS') number_node = self.number() self.eat('SEMI') return ('ASSIGNMENT', id_node, number_node) def print_statement(self): self.eat('PRINTF') self.eat('LPAREN') string_node = self.string() self.eat('COMMA') id_node = self.variable() self.eat('RPAREN') self.eat('SEMI') return ('PRINT', string_node, id_node) def string(self): token = self.current_token self.eat('STRING') return ('STRING', token.value) def number(self): token = self.current_token self.eat('NUMBER') return ('NUMBER', token.value) def parse(self): return self.program() # 将代码转换为Token序列 lexer = Lexer(text) tokens = [] while True: token = lexer.get_next_token() if token.type == 'EOF': break tokens.append(token) # 将Token序列转换为抽象语法树 parser = Parser(Lexer(text)) ast = parser.parse() print(ast) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值