Calculator3
Description
This version of the calculator is substantially充分地 more complex than previous versions. Major changes include control constructs such as if-else and while. In addition, a syntax tree is constructed during parsing. After parsing, we walk the syntax tree to produce output. Two versions of the tree walk routine are supplied:
• an interpreter that executes statements during the tree walk, and
• a compiler that generates code for a hypothetical 假定的stack-based machine.
The include file contains declarations for the syntax tree and symbol table. The symbol table, sym, allows for single-character variable names. A node in the syntax tree may hold a constant (conNodeType), an identifier (idNodeType), or an internal node with an operator (oprNodeType). A union encapsulates all three variants, and nodeType.type is used to determine which structure we have.
The lex input file contains patterns for VARIABLE and INTEGER tokens. In addition, tokens are defined for 2-character operators such as EQ and NE. Single-character operators are simply returned as themselves.
The yacc input file defines YYSTYPE, the type of yylval, as
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
This causes the following to be generated in y.tab.h:
typedef union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
} YYSTYPE;
extern YYSTYPE yylval;
Constants, variables, and nodes can be represented by yylval in the parser’s value stack. Notice the type definitions
%token <iValue> INTEGER
%type <nPtr> expr
This binds expr to nPtr, and INTEGER to iValue in the YYSTYPE union. This is required so that yacc can generate the correct code. For example, the rule
expr: INTEGER { $$ = con($1); }
should generate the following code. Note that yyvsp[0] addresses the top of the value stack, or the value associated with INTEGER.
yylval.nPtr = con(yyvsp[0].iValue);
The unary minus operator is given higher priority than binary operators as follows:
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
The %nonassoc indicates no associativity is implied. It is frequently used in conjunction with %prec to specify precedence of a rule. Thus, we have
expr: '-' expr %prec UMINUS { $$ = node(UMINUS, 1, $2); }
indicating that the precedence of the rule is the same as the precedence of token UMINUS. And, as defined above, UMINUS has higher precedence than the other operators. A similar technique is used to remove ambiguity associated with the if-else statement (see If-Else Ambiguity).
The syntax tree is constructed bottom-up, allocating the leaf nodes when variables and integers are reduced. When operators are encountered, a node is allocated and pointers to previously allocated nodes are entered as operands.
After the tree is built, function ex is called to do a depth-first walk of the syntax tree. A depth-first walk visits nodes in the order that they were originally allocated. This results in operators being applied in the order that they were encountered during parsing. Three versions of ex are included: an interpretive version, a compiler version, and a version that generates a syntax tree.
Yacc Input
% ... {
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/**//* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
int ex(nodeType *p);
int yylex(void);
void yyerror(char *s);
int sym[26]; /**//* symbol table */
%}
% union ... {
int iValue; /**//* integer value */
char sIndex; /**//* symbol table index */
nodeType *nPtr; /**//* node pointer */
} ;
% token < iValue > INTEGER
% token < sIndex > VARIABLE
% token WHILE IF PRINT
% nonassoc IFX
% nonassoc ELSE
% left GE LE EQ NE ' > ' ' < '
% left ' + ' ' - '
% left ' * ' ' / '
% nonassoc UMINUS
% type < nPtr > stmt expr stmt_list
%%
program:
function ... { exit(0); }
;
function:
function stmt ... { ex($2); freeNode($2); }
| /**/ /* NULL */
;
stmt:
' ; ' ... { $$ = opr(';', 2, NULL, NULL); }
| expr ' ; ' ... { $$ = $1; }
| PRINT expr ' ; ' ... { $$ = opr(PRINT, 1, $2); }
| VARIABLE ' = ' expr ' ; ' ... { $$ = opr('=', 2, id($1), $3); }
| WHILE ' ( ' expr ' ) ' stmt ... { $$ = opr(WHILE, 2, $3, $5); }
| IF ' ( ' expr ' ) ' stmt % prec IFX ... { $$ = opr(IF, 2, $3, $5); }
| IF ' ( ' expr ' ) ' stmt ELSE stmt
... { $$ = opr(IF, 3, $3, $5, $7); }
| ' { ' stmt_list ' } ' ... { $$ = $2; }
;
stmt_list:
stmt ... { $$ = $1; }
| stmt_list stmt ... { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER ... { $$ = con($1); }
| VARIABLE ... { $$ = id($1); }
| ' - ' expr % prec UMINUS ... { $$ = opr(UMINUS, 1, $2); }
| expr ' + ' expr ... { $$ = opr('+', 2, $1, $3); }
| expr ' - ' expr ... { $$ = opr('-', 2, $1, $3); }
| expr ' * ' expr ... { $$ = opr('*', 2, $1, $3); }
| expr ' / ' expr ... { $$ = opr('/', 2, $1, $3); }
| expr ' < ' expr ... { $$ = opr('<', 2, $1, $3); }
| expr ' > ' expr ... { $$ = opr('>', 2, $1, $3); }
| expr GE expr ... { $$ = opr(GE, 2, $1, $3); }
| expr LE expr ... { $$ = opr(LE, 2, $1, $3); }
| expr NE expr ... { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr ... { $$ = opr(EQ, 2, $1, $3); }
| ' ( ' expr ' ) ' ... { $$ = $2; }
;
%%
#define SIZEOF_NODETYPE ((char *)&p->con - (char *)p)
nodeType * con( int value) ... {
nodeType *p;
size_t nodeSize;
/**//* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(conNodeType);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/**//* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType * id( int i) ... {
nodeType *p;
size_t nodeSize;
/**//* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(idNodeType);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/**//* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType * opr( int oper, int nops, ...) ... {
va_list ap;
nodeType *p;
size_t nodeSize;
int i;
/**//* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(oprNodeType) +
(nops - 1) * sizeof(nodeType*);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/**//* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType * p) ... {
int i;
if (!p) return;
if (p->type == typeOpr) ...{
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
}
free (p);
}
void yyerror( char * s) ... {
fprintf(stdout, "%s ", s);
}
int main( void ) ... {
yyparse();
return 0;
}
Description
This version of the calculator is substantially充分地 more complex than previous versions. Major changes include control constructs such as if-else and while. In addition, a syntax tree is constructed during parsing. After parsing, we walk the syntax tree to produce output. Two versions of the tree walk routine are supplied:
• an interpreter that executes statements during the tree walk, and
• a compiler that generates code for a hypothetical 假定的stack-based machine.
The include file contains declarations for the syntax tree and symbol table. The symbol table, sym, allows for single-character variable names. A node in the syntax tree may hold a constant (conNodeType), an identifier (idNodeType), or an internal node with an operator (oprNodeType). A union encapsulates all three variants, and nodeType.type is used to determine which structure we have.
The lex input file contains patterns for VARIABLE and INTEGER tokens. In addition, tokens are defined for 2-character operators such as EQ and NE. Single-character operators are simply returned as themselves.
The yacc input file defines YYSTYPE, the type of yylval, as
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
This causes the following to be generated in y.tab.h:
typedef union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
} YYSTYPE;
extern YYSTYPE yylval;
Constants, variables, and nodes can be represented by yylval in the parser’s value stack. Notice the type definitions
%token <iValue> INTEGER
%type <nPtr> expr
This binds expr to nPtr, and INTEGER to iValue in the YYSTYPE union. This is required so that yacc can generate the correct code. For example, the rule
expr: INTEGER { $$ = con($1); }
should generate the following code. Note that yyvsp[0] addresses the top of the value stack, or the value associated with INTEGER.
yylval.nPtr = con(yyvsp[0].iValue);
The unary minus operator is given higher priority than binary operators as follows:
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
The %nonassoc indicates no associativity is implied. It is frequently used in conjunction with %prec to specify precedence of a rule. Thus, we have
expr: '-' expr %prec UMINUS { $$ = node(UMINUS, 1, $2); }
indicating that the precedence of the rule is the same as the precedence of token UMINUS. And, as defined above, UMINUS has higher precedence than the other operators. A similar technique is used to remove ambiguity associated with the if-else statement (see If-Else Ambiguity).
The syntax tree is constructed bottom-up, allocating the leaf nodes when variables and integers are reduced. When operators are encountered, a node is allocated and pointers to previously allocated nodes are entered as operands.
After the tree is built, function ex is called to do a depth-first walk of the syntax tree. A depth-first walk visits nodes in the order that they were originally allocated. This results in operators being applied in the order that they were encountered during parsing. Three versions of ex are included: an interpretive version, a compiler version, and a version that generates a syntax tree.
Include File
typedef enum ... { typeCon, typeId, typeOpr } nodeEnum;
/**/ /* constants */
typedef struct ... {
int value; /**//* value of constant */
} conNodeType;
/**/ /* identifiers */
typedef struct ... {
int i; /**//* subscript to sym array */
} idNodeType;
/**/ /* operators */
typedef struct ... {
int oper; /**//* operator */
int nops; /**//* number of operands */
struct nodeTypeTag *op[1]; /**//* operands (expandable) */
} oprNodeType;
typedef struct nodeTypeTag ... {
nodeEnum type; /**//* type of node */
/**//* union must be last entry in nodeType */
/**//* because operNodeType may dynamically increase */
union ...{
conNodeType con; /**//* constants */
idNodeType id; /**//* identifiers */
oprNodeType opr; /**//* operators */
};
} nodeType;
extern int sym[ 26 ];
typedef enum ... { typeCon, typeId, typeOpr } nodeEnum;
/**/ /* constants */
typedef struct ... {
int value; /**//* value of constant */
} conNodeType;
/**/ /* identifiers */
typedef struct ... {
int i; /**//* subscript to sym array */
} idNodeType;
/**/ /* operators */
typedef struct ... {
int oper; /**//* operator */
int nops; /**//* number of operands */
struct nodeTypeTag *op[1]; /**//* operands (expandable) */
} oprNodeType;
typedef struct nodeTypeTag ... {
nodeEnum type; /**//* type of node */
/**//* union must be last entry in nodeType */
/**//* because operNodeType may dynamically increase */
union ...{
conNodeType con; /**//* constants */
idNodeType id; /**//* identifiers */
oprNodeType opr; /**//* operators */
};
} nodeType;
extern int sym[ 26 ];
Lex Input
% ... {
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
void yyerror(char *);
%}
%%
[a - z] ... {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
[ 0 - 9 ] + ... {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[ - () <>=+*/ ; ... {} .] ... {
return *yytext;
}
" >= " return GE;
" <= " return LE;
" == " return EQ;
" != " return NE;
" while " return WHILE;
" if " return IF;
" else " return ELSE;
" print " return PRINT;
[ ] + ; /**/ /* ignore whitespace */
. yyerror( " Unknown character " );
%%
int yywrap( void ) ... {
return 1;
}
% ... {
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
void yyerror(char *);
%}
%%
[a - z] ... {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
[ 0 - 9 ] + ... {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[ - () <>=+*/ ; ... {} .] ... {
return *yytext;
}
" >= " return GE;
" <= " return LE;
" == " return EQ;
" != " return NE;
" while " return WHILE;
" if " return IF;
" else " return ELSE;
" print " return PRINT;
[ ] + ; /**/ /* ignore whitespace */
. yyerror( " Unknown character " );
%%
int yywrap( void ) ... {
return 1;
}
Yacc Input
% ... {
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/**//* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
int ex(nodeType *p);
int yylex(void);
void yyerror(char *s);
int sym[26]; /**//* symbol table */
%}
% union ... {
int iValue; /**//* integer value */
char sIndex; /**//* symbol table index */
nodeType *nPtr; /**//* node pointer */
} ;
% token < iValue > INTEGER
% token < sIndex > VARIABLE
% token WHILE IF PRINT
% nonassoc IFX
% nonassoc ELSE
% left GE LE EQ NE ' > ' ' < '
% left ' + ' ' - '
% left ' * ' ' / '
% nonassoc UMINUS
% type < nPtr > stmt expr stmt_list
%%
program:
function ... { exit(0); }
;
function:
function stmt ... { ex($2); freeNode($2); }
| /**/ /* NULL */
;
stmt:
' ; ' ... { $$ = opr(';', 2, NULL, NULL); }
| expr ' ; ' ... { $$ = $1; }
| PRINT expr ' ; ' ... { $$ = opr(PRINT, 1, $2); }
| VARIABLE ' = ' expr ' ; ' ... { $$ = opr('=', 2, id($1), $3); }
| WHILE ' ( ' expr ' ) ' stmt ... { $$ = opr(WHILE, 2, $3, $5); }
| IF ' ( ' expr ' ) ' stmt % prec IFX ... { $$ = opr(IF, 2, $3, $5); }
| IF ' ( ' expr ' ) ' stmt ELSE stmt
... { $$ = opr(IF, 3, $3, $5, $7); }
| ' { ' stmt_list ' } ' ... { $$ = $2; }
;
stmt_list:
stmt ... { $$ = $1; }
| stmt_list stmt ... { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER ... { $$ = con($1); }
| VARIABLE ... { $$ = id($1); }
| ' - ' expr % prec UMINUS ... { $$ = opr(UMINUS, 1, $2); }
| expr ' + ' expr ... { $$ = opr('+', 2, $1, $3); }
| expr ' - ' expr ... { $$ = opr('-', 2, $1, $3); }
| expr ' * ' expr ... { $$ = opr('*', 2, $1, $3); }
| expr ' / ' expr ... { $$ = opr('/', 2, $1, $3); }
| expr ' < ' expr ... { $$ = opr('<', 2, $1, $3); }
| expr ' > ' expr ... { $$ = opr('>', 2, $1, $3); }
| expr GE expr ... { $$ = opr(GE, 2, $1, $3); }
| expr LE expr ... { $$ = opr(LE, 2, $1, $3); }
| expr NE expr ... { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr ... { $$ = opr(EQ, 2, $1, $3); }
| ' ( ' expr ' ) ' ... { $$ = $2; }
;
%%
#define SIZEOF_NODETYPE ((char *)&p->con - (char *)p)
nodeType * con( int value) ... {
nodeType *p;
size_t nodeSize;
/**//* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(conNodeType);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/**//* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType * id( int i) ... {
nodeType *p;
size_t nodeSize;
/**//* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(idNodeType);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/**//* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType * opr( int oper, int nops, ...) ... {
va_list ap;
nodeType *p;
size_t nodeSize;
int i;
/**//* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(oprNodeType) +
(nops - 1) * sizeof(nodeType*);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/**//* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType * p) ... {
int i;
if (!p) return;
if (p->type == typeOpr) ...{
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
}
free (p);
}
void yyerror( char * s) ... {
fprintf(stdout, "%s ", s);
}
int main( void ) ... {
yyparse();
return 0;
}
Interpreter
#include < stdio.h >
#include " calc3.h "
#include " y.tab.h "
int ex(nodeType * p) ... {
if (!p) return 0;
switch(p->type) ...{
case typeCon: return p->con.value;
case typeId: return sym[p->id.i];
case typeOpr:
switch(p->opr.oper) ...{
case WHILE: while(ex(p->opr.op[0]))
ex(p->opr.op[1]); return 0;
case IF: if (ex(p->opr.op[0]))
ex(p->opr.op[1]);
else if (p->opr.nops > 2)
ex(p->opr.op[2]);
return 0;
case PRINT: printf("%d ", ex(p->opr.op[0])); return 0;
case ';': ex(p->opr.op[0]);
return ex(p->opr.op[1]);
case '=': return sym[p->opr.op[0]->id.i] =
ex(p->opr.op[1]);
case UMINUS: return -ex(p->opr.op[0]);
case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]);
case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]);
case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]);
case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]);
case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]);
case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]);
case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]);
case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]);
case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]);
case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]);
}
}
return 0;
}
#include < stdio.h >
#include " calc3.h "
#include " y.tab.h "
int ex(nodeType * p) ... {
if (!p) return 0;
switch(p->type) ...{
case typeCon: return p->con.value;
case typeId: return sym[p->id.i];
case typeOpr:
switch(p->opr.oper) ...{
case WHILE: while(ex(p->opr.op[0]))
ex(p->opr.op[1]); return 0;
case IF: if (ex(p->opr.op[0]))
ex(p->opr.op[1]);
else if (p->opr.nops > 2)
ex(p->opr.op[2]);
return 0;
case PRINT: printf("%d ", ex(p->opr.op[0])); return 0;
case ';': ex(p->opr.op[0]);
return ex(p->opr.op[1]);
case '=': return sym[p->opr.op[0]->id.i] =
ex(p->opr.op[1]);
case UMINUS: return -ex(p->opr.op[0]);
case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]);
case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]);
case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]);
case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]);
case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]);
case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]);
case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]);
case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]);
case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]);
case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]);
}
}
return 0;
}