《自制编程语言:第一章和第二章》
1使用yacc和lex实现简单的计算器
这两个工具的作用就是将我们输入的一个字符串,如1+1,解析后形成.c代码,然后我们用GCC来编译c代码生成exe文件,点击exe,输入1+1,回车执行看到效果。
win无法直接使用上述两个命令,所以安装各自的替代品bison和flex,都在unxutils工具包中。
安装unxutils,配置环境变量,可以使用bison和flex命令
https://blog.csdn.net/bedusing/article/details/5409495#comments
安装QTcreator,将minGW配置到环境变量,可以使用GCC命令
代码如下:
mycalc.y
%{
#include <stdio.h>
#include <stdlib.h>
#define YYDEBUG 1
%}
%union {
int int_value;
double double_value;
}
%token <double_value> DOUBLE_LITERAL
%token ADD SUB MUL DIV CR
%type <double_value> expression term primary_expression
%%
line_list
: line
| line_list line
;
line
: expression CR
{
printf(">>%lf\n", $1);
}
expression
: term
| expression ADD term
{
$$ = $1 + $3;
}
| expression SUB term
{
$$ = $1 - $3;
}
;
term
: primary_expression
| term MUL primary_expression
{
$$ = $1 * $3;
}
| term DIV primary_expression
{
$$ = $1 / $3;
}
;
primary_expression
: DOUBLE_LITERAL
;
%%
int
yyerror(char const *str)
{
extern char *yytext;
fprintf(stderr, "parser error near %s\n", yytext);
return 0;
}
int main(void)
{
extern int yyparse(void);
extern FILE *yyin;
yyin = stdin;
if (yyparse()) {
fprintf(stderr, "Error ! Error ! Error !\n");
exit(1);
}
}
mycalc.l
%{
#include <stdio.h>
#include "y.tab.h"
int
yywrap(void)
{
return 1;
}
%}
%%
"+" return ADD;
"-" return SUB;
"*" return MUL;
"/" return DIV;
"\n" return CR;
([1-9][0-9]*)|0|([0-9]+\.[0-9]*) {
double temp;
sscanf(yytext, "%lf", &temp);
yylval.double_value = temp;
return DOUBLE_LITERAL;
}
[ \t] ;
. {
fprintf(stderr, "lexical error.\n");
exit(1);
}
%%
bat脚本文件,点击执行。也可以一行一行输入查看效果
-- 生成y.tab.c和y.tab.h
bison --yacc -dv mycalc.y
-- 引入y.tab.h头文件,生成lex.yy.c
flex mycalc.l
-- 将y.tab.c lex.yy.c生成mycalc.exe
gcc -o mycalc y.tab.c lex.yy.c
执行mycalc.exe测试
2不借助工具编写计算器
自制词法分析器
运行机制:每传入一个字符串,就会调用吃一次get_token(),并返回分割好的记号。
token.h
#ifndef TOKEN_H_INCLUDED
#define TOKEN_H_INCLUDED
typedef enum {
BAD_TOKEN,
NUMBER_TOKEN,
ADD_OPERATOR_TOKEN,
SUB_OPERATOR_TOKEN,
MUL_OPERATOR_TOKEN,
DIV_OPERATOR_TOKEN,
LEFT_PAREN_TOKEN,
RIGHT_PAREN_TOKEN,
END_OF_LINE_TOKEN
} TokenKind;
#define MAX_TOKEN_SIZE (100)
//!!!!!
typedef struct {
TokenKind kind;
double value;
char str[MAX_TOKEN_SIZE];
} Token;
void set_line(char *line);
void get_token(Token *token);
#endif /* TOKEN_H_INCLUDED */
lexicalanalyzer.c
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "token.h"
static char *st_line;
static int st_line_pos;
typedef enum {
INITIAL_STATUS,
IN_INT_PART_STATUS,
DOT_STATUS,
IN_FRAC_PART_STATUS
} LexerStatus;
void
get_token(Token *token)
{
int out_pos = 0;
LexerStatus status = INITIAL_STATUS;
char current_char;
token->kind = BAD_TOKEN;
while (st_line[st_line_pos] != '\0') {
current_char = st_line[st_line_pos];
if ((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS)
&& !isdigit(current_char) && current_char != '.') {
token->kind = NUMBER_TOKEN;
sscanf(token->str, "%lf", &token->value);
return;
}
if (isspace(current_char)) {
if (current_char == '\n') {
token->kind = END_OF_LINE_TOKEN;
return;
}
st_line_pos++;
continue;
}
if (out_pos >= MAX_TOKEN_SIZE-1) {
fprintf(stderr, "token too long.\n");
exit(1);
}
token->str[out_pos] = st_line[st_line_pos];
st_line_pos++;
out_pos++;
token->str[out_pos] = '\0';
if (current_char == '+') {
token->kind = ADD_OPERATOR_TOKEN;
return;
} else if (current_char == '-') {
token->kind = SUB_OPERATOR_TOKEN;
return;
} else if (current_char == '*') {
token->kind = MUL_OPERATOR_TOKEN;
return;
} else if (current_char == '/') {
token->kind = DIV_OPERATOR_TOKEN;
return;
} else if (current_char == '(') {
token->kind = LEFT_PAREN_TOKEN;
return;
} else if (current_char == ')') {
token->kind = RIGHT_PAREN_TOKEN;
return;
} else if (isdigit(current_char)) {
if (status == INITIAL_STATUS) {
status = IN_INT_PART_STATUS;
} else if (status == DOT_STATUS) {
status = IN_FRAC_PART_STATUS;
}
} else if (current_char == '.') {
if (status == IN_INT_PART_STATUS) {
status = DOT_STATUS;
} else {
fprintf(stderr, "syntax error.\n");
exit(1);
}
}
}
}
void
set_line(char *line)
{
st_line = line;
st_line_pos = 0;
}
#if 0
void
parse_line(void)
{
Token token;
st_line_pos = 0;
for (;;) {
get_token(&token);
if (token.kind == END_OF_LINE_TOKEN) {
break;
} else {
printf("kind..%d, str..%s\n", token.kind, token.str);
}
}
}
int
main(int argc, char **argv)
{
//循环读取输入的字符串并分割
while (fgets(st_line, LINE_BUF_SIZE, stdin) != NULL) {
parse_line();
}
return 0;
}
#endif
自制语法分析器
词法分析器将输入的字符串分割并保存在内存中了,现在要判断分割出来的树是个怎样的执行顺序。
采用递归下降分析的方法。
parser.c
#include <stdio.h>
#include <stdlib.h>
#include "token.h"
#define LINE_BUF_SIZE (1024)
static Token st_look_ahead_token;
static int st_look_ahead_token_exists;
static void
my_get_token(Token *token)
{
if (st_look_ahead_token_exists) {
*token = st_look_ahead_token;
st_look_ahead_token_exists = 0;
} else {
get_token(token);
}
}
static void
unget_token(Token *token)
{
st_look_ahead_token = *token;
st_look_ahead_token_exists = 1;
}
double parse_expression(void);
static double
parse_primary_expression()
{
Token token;
double value = 0.0;
int minus_flag = 0;
my_get_token(&token);
if (token.kind == SUB_OPERATOR_TOKEN) {
minus_flag = 1;
} else {
unget_token(&token);
}
my_get_token(&token);
if (token.kind == NUMBER_TOKEN) {
value = token.value;
} else if (token.kind == LEFT_PAREN_TOKEN) {
value = parse_expression();
my_get_token(&token);
if (token.kind != RIGHT_PAREN_TOKEN) {
fprintf(stderr, "missing ')' error.\n");
exit(1);
}
} else {
unget_token(&token);
}
if (minus_flag) {
value = -value;
}
return value;
}
static double
parse_term()
{
double v1;
double v2;
Token token;
v1 = parse_primary_expression();
for (;;) {
my_get_token(&token);
if (token.kind != MUL_OPERATOR_TOKEN
&& token.kind != DIV_OPERATOR_TOKEN) {
unget_token(&token);
break;
}
v2 = parse_primary_expression();
if (token.kind == MUL_OPERATOR_TOKEN) {
v1 *= v2;
} else if (token.kind == DIV_OPERATOR_TOKEN) {
v1 /= v2;
}
}
return v1;
}
double
parse_expression()
{
double v1;
double v2;
Token token;
v1 = parse_term();
for (;;) {
my_get_token(&token);
if (token.kind != ADD_OPERATOR_TOKEN
&& token.kind != SUB_OPERATOR_TOKEN) {
unget_token(&token);
break;
}
v2 = parse_term();
if (token.kind == ADD_OPERATOR_TOKEN) {
v1 += v2;
} else if (token.kind == SUB_OPERATOR_TOKEN) {
v1 -= v2;
} else {
unget_token(&token);
}
}
return v1;
}
double
parse_line(void)
{
double value;
st_look_ahead_token_exists = 0;
value = parse_expression();
return value;
}
int
main(int argc, char **argv)
{
char line[LINE_BUF_SIZE];
double value;
while (fgets(line, LINE_BUF_SIZE, stdin) != NULL) {
set_line(line);
value = parse_line();
printf(">>%f\n", value);
}
return 0;
}
编译生成exe
gcc -o mycalc -Wall -Wswitch-enum -ansi parser.c lexicalanalyzer.c
运行mycalc .exe
visual studio实现