《自制编程语言：第一章和第二章》C语言实现简单的计算器，词法分析器和语法分析器

一个java开发

已于 2022-02-01 13:17:21 修改

阅读量1.1k

点赞数 1

分类专栏：自制编程语言文章标签： c语言

于 2022-02-01 13:07:20 首次发布

本文链接：https://blog.csdn.net/hebian1994/article/details/122763605

版权

自制编程语言专栏收录该内容

2 篇文章 0 订阅

订阅专栏

《自制编程语言：第一章和第二章》

1使用yacc和lex实现简单的计算器

这两个工具的作用就是将我们输入的一个字符串，如1+1，解析后形成.c代码，然后我们用GCC来编译c代码生成exe文件，点击exe，输入1+1，回车执行看到效果。

win无法直接使用上述两个命令，所以安装各自的替代品bison和flex，都在unxutils工具包中。

安装unxutils，配置环境变量，可以使用bison和flex命令

https://blog.csdn.net/bedusing/article/details/5409495#comments

安装QTcreator，将minGW配置到环境变量，可以使用GCC命令

代码如下：

mycalc.y

%{
#include <stdio.h>
#include <stdlib.h>
#define YYDEBUG 1
%}
%union {
    int          int_value;
    double       double_value;
}
%token <double_value>      DOUBLE_LITERAL
%token ADD SUB MUL DIV CR
%type <double_value> expression term primary_expression
%%
line_list
    : line
    | line_list line
    ;
line
    : expression CR
    {
        printf(">>%lf\n", $1);
    }
expression
    : term
    | expression ADD term
    {
        $$ = $1 + $3;
    }
    | expression SUB term
    {
        $$ = $1 - $3;
    }
    ;
term
    : primary_expression
    | term MUL primary_expression 
    {
        $$ = $1 * $3;
    }
    | term DIV primary_expression
    {
        $$ = $1 / $3;
    }
    ;
primary_expression
    : DOUBLE_LITERAL
    ;                 
%%
int
yyerror(char const *str)
{
    extern char *yytext;
    fprintf(stderr, "parser error near %s\n", yytext);
    return 0;
}

int main(void)
{
    extern int yyparse(void);
    extern FILE *yyin;

    yyin = stdin;
    if (yyparse()) {
        fprintf(stderr, "Error ! Error ! Error !\n");
        exit(1);
    }
}

mycalc.l

%{
#include <stdio.h>
#include "y.tab.h"

int
yywrap(void)
{
    return 1;
}
%}
%%
"+"             return ADD;
"-"             return SUB;
"*"             return MUL;
"/"             return DIV;
"\n"            return CR;
([1-9][0-9]*)|0|([0-9]+\.[0-9]*) {
    double temp;
    sscanf(yytext, "%lf", &temp);
    yylval.double_value = temp;
    return DOUBLE_LITERAL;
}
[ \t] ;
. {
    fprintf(stderr, "lexical error.\n");
    exit(1);
}
%%

bat脚本文件，点击执行。也可以一行一行输入查看效果

-- 生成y.tab.c和y.tab.h
bison --yacc -dv mycalc.y

-- 引入y.tab.h头文件，生成lex.yy.c
flex mycalc.l
-- 将y.tab.c  lex.yy.c生成mycalc.exe
gcc -o mycalc y.tab.c lex.yy.c

执行mycalc.exe测试

2不借助工具编写计算器

自制词法分析器

运行机制：每传入一个字符串，就会调用吃一次get_token(),并返回分割好的记号。

token.h

#ifndef TOKEN_H_INCLUDED
#define TOKEN_H_INCLUDED

typedef enum {
    BAD_TOKEN,
    NUMBER_TOKEN,
    ADD_OPERATOR_TOKEN,
    SUB_OPERATOR_TOKEN,
    MUL_OPERATOR_TOKEN,
    DIV_OPERATOR_TOKEN,
    LEFT_PAREN_TOKEN,
    RIGHT_PAREN_TOKEN,
    END_OF_LINE_TOKEN
} TokenKind;

#define MAX_TOKEN_SIZE (100)

//！！！！！
typedef struct {
    TokenKind kind;
    double      value;
    char        str[MAX_TOKEN_SIZE];
} Token;

void set_line(char *line);
void get_token(Token *token);

#endif /* TOKEN_H_INCLUDED */

lexicalanalyzer.c

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "token.h"

static char *st_line;
static int st_line_pos;

typedef enum {
    INITIAL_STATUS,
    IN_INT_PART_STATUS,
    DOT_STATUS,
    IN_FRAC_PART_STATUS
} LexerStatus;

void
get_token(Token *token)
{
    int out_pos = 0;
    LexerStatus status = INITIAL_STATUS;
    char current_char;

    token->kind = BAD_TOKEN;
    while (st_line[st_line_pos] != '\0') {
        current_char = st_line[st_line_pos];
        if ((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS)
            && !isdigit(current_char) && current_char != '.') {
            token->kind = NUMBER_TOKEN;
            sscanf(token->str, "%lf", &token->value);
            return;
        }
        if (isspace(current_char)) {
            if (current_char == '\n') {
                token->kind = END_OF_LINE_TOKEN;
                return;
            }
            st_line_pos++;
            continue;
        }

        if (out_pos >= MAX_TOKEN_SIZE-1) {
            fprintf(stderr, "token too long.\n");
            exit(1);
        }
        token->str[out_pos] = st_line[st_line_pos];
        st_line_pos++;
        out_pos++;
        token->str[out_pos] = '\0';

        if (current_char == '+') {
            token->kind = ADD_OPERATOR_TOKEN;
            return;
        } else if (current_char == '-') {
            token->kind = SUB_OPERATOR_TOKEN;
            return;
        } else if (current_char == '*') {
            token->kind = MUL_OPERATOR_TOKEN;
            return;
        } else if (current_char == '/') {
            token->kind = DIV_OPERATOR_TOKEN;
            return;
        } else if (current_char == '(') {
            token->kind = LEFT_PAREN_TOKEN;
            return;
        } else if (current_char == ')') {
            token->kind = RIGHT_PAREN_TOKEN;
            return;
        } else if (isdigit(current_char)) {
            if (status == INITIAL_STATUS) {
                status = IN_INT_PART_STATUS;
            } else if (status == DOT_STATUS) {
                status = IN_FRAC_PART_STATUS;
            }
        } else if (current_char == '.') {
            if (status == IN_INT_PART_STATUS) {
                status = DOT_STATUS;
            } else {
                fprintf(stderr, "syntax error.\n");
                exit(1);
            }
        }
    }
}

void
set_line(char *line)
{
    st_line = line;
    st_line_pos = 0;
}

#if 0
void
parse_line(void)
{
    Token token;
    st_line_pos = 0;

    for (;;) {
        get_token(&token);
        if (token.kind == END_OF_LINE_TOKEN) {
          break;
        } else {
            printf("kind..%d, str..%s\n", token.kind, token.str);
        }
    }
}

int
main(int argc, char **argv)
{
//循环读取输入的字符串并分割
    while (fgets(st_line, LINE_BUF_SIZE, stdin) != NULL) {
        parse_line();
    }

    return 0;
}
#endif

自制语法分析器

词法分析器将输入的字符串分割并保存在内存中了，现在要判断分割出来的树是个怎样的执行顺序。

采用递归下降分析的方法。

parser.c

#include <stdio.h>
#include <stdlib.h>
#include "token.h"

#define LINE_BUF_SIZE (1024)

static Token st_look_ahead_token;
static int st_look_ahead_token_exists;

static void
my_get_token(Token *token)
{
    if (st_look_ahead_token_exists) {
        *token = st_look_ahead_token;
        st_look_ahead_token_exists = 0;
    } else {
        get_token(token);
    }
}

static void
unget_token(Token *token)
{
    st_look_ahead_token = *token;
    st_look_ahead_token_exists = 1;
}

double parse_expression(void);

static double
parse_primary_expression()
{
    Token token;
    double value = 0.0;
    int minus_flag = 0;

    my_get_token(&token);
    if (token.kind == SUB_OPERATOR_TOKEN) {
        minus_flag = 1;
    } else {
        unget_token(&token);
    }

    my_get_token(&token);
    if (token.kind == NUMBER_TOKEN) {
        value = token.value;
    } else if (token.kind == LEFT_PAREN_TOKEN) {
        value = parse_expression();
        my_get_token(&token);
        if (token.kind != RIGHT_PAREN_TOKEN) {
            fprintf(stderr, "missing ')' error.\n");
            exit(1);
        }
    } else {
        unget_token(&token);
    }
    if (minus_flag) {
        value = -value;
    }
    return value;
}

static double
parse_term()
{
    double v1;
    double v2;
    Token token;

    v1 = parse_primary_expression();
    for (;;) {
        my_get_token(&token);
        if (token.kind != MUL_OPERATOR_TOKEN
            && token.kind != DIV_OPERATOR_TOKEN) {
            unget_token(&token);
            break;
        }
        v2 = parse_primary_expression();
        if (token.kind == MUL_OPERATOR_TOKEN) {
            v1 *= v2;
        } else if (token.kind == DIV_OPERATOR_TOKEN) {
            v1 /= v2;
        }
    }
    return v1;
}

double
parse_expression()
{
    double v1;
    double v2;
    Token token;


    v1 = parse_term();
    for (;;) {
        my_get_token(&token);
        if (token.kind != ADD_OPERATOR_TOKEN 
            && token.kind != SUB_OPERATOR_TOKEN) {
            unget_token(&token);
            break;
        }
        v2 = parse_term();
        if (token.kind == ADD_OPERATOR_TOKEN) {
            v1 += v2;
        } else if (token.kind == SUB_OPERATOR_TOKEN) {
            v1 -= v2;
        } else {
            unget_token(&token);
        }
    }
    return v1;
}

double
parse_line(void)
{
    double value;

    st_look_ahead_token_exists = 0;
    value = parse_expression();

    return value;
}

int
main(int argc, char **argv)
{
    char line[LINE_BUF_SIZE];
    double value;

    while (fgets(line, LINE_BUF_SIZE, stdin) != NULL) {
        set_line(line);
        value = parse_line();
        printf(">>%f\n", value);
    }

    return 0;
}

编译生成exe

gcc -o mycalc -Wall -Wswitch-enum -ansi parser.c lexicalanalyzer.c

运行mycalc .exe

visual studio实现

一个java开发

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录