学习了编译原理的scan和parsing之后写了一个简单的算数表达式解析器,输入为算数表达式,输出结果,支持括号和浮点数运算。
首先画出DFA,根据状态间的转移求出getToken函数。
然后求出EBNF:
<exp> -> <term> { <addop> <term> }
<addop> -> + | -
<term> -> <factor> { <mulop> <factor> }
<mulop> -> *
<factor> -> ( <exp> ) | number
并实现exp、term、factor函数。
源代码:
首先画出DFA,根据状态间的转移求出getToken函数。
然后求出EBNF:
<exp> -> <term> { <addop> <term> }
<addop> -> + | -
<term> -> <factor> { <mulop> <factor> }
<mulop> -> *
<factor> -> ( <exp> ) | number
并实现exp、term、factor函数。
源代码:
#include
<
stdio.h
>
#include < ctype.h >
#include < stdlib.h >
#include < string .h >
#define BUFLEN 1024
#define MAXTOKENLEN 256
// token的类型
typedef enum
... {
// 文件结尾
ENDFILE,
// 表达式错误
ERROR,
// + - * / ( )符号之一
SYMBOL,
// 数字
NUM,
} TokenType;
// DFA中的状态
typedef enum
... {
START, // 开始状态
INUM, // 数字的中间状态
DOT, // 接受到符号.的状态
END // 结束状态
} StateType;
int bufIndex;
char buffer[BUFLEN]; // 缓冲区
char tokenString[MAXTOKENLEN + 1 ]; // 当前token
double exp();
double term();
double factor();
// 获取缓冲区下一个字符
char getNextChar()
... {
int len = strlen(buffer);
if(bufIndex < len)
return buffer[bufIndex++];
else
return EOF;
}
// 缓冲区回退一个字符
void ungetNextChar()
... {bufIndex--;}
// 获取缓冲区下一个token,并返回token的类型
TokenType getToken()
... {
int tokenIndex = 0;
TokenType currentType;
StateType state = START;
bool save;
while(state != END)
...{
char c = getNextChar();
save = true;
switch(state)
...{
case START:
...{
if(isdigit(c))
state = INUM;
else if(c == '+' || c == '-' || c == '*' || c == '/' ||
c == '(' || c == ')')
...{
currentType = SYMBOL;
state = END;
}
else if(c == EOF)
...{
currentType = ENDFILE;
state = END;
}
else
...{
currentType = ERROR;
state = END;
}
break;
}
case INUM:
...{
if(c == '.')
state = DOT;
else if(c == EOF)
...{
currentType = NUM;
state = END;
}
else if(!isdigit(c))
...{
ungetNextChar();
save = false;
currentType = NUM;
state = END;
}
break;
}
case DOT:
...{
if(isdigit(c))
state = INUM;
else
...{
currentType = ERROR;
state = END;
}
break;
}
}
if(save && tokenIndex < MAXTOKENLEN)
tokenString[tokenIndex++] = c;
if(state == END)
tokenString[tokenIndex] = '/0';
}
return currentType;
}
// 输出错误信息
void error()
... {
printf("syntax error! ");
exit(1);
}
// parsing中的match函数
void match( const char * expectedToken)
... {
if(strcmp(expectedToken, tokenString) == 0)
getToken();
else
error();
}
double exp()
... {
double tmp = term();
while(tokenString[0] == '+' || tokenString[0] == '-')
...{
switch(tokenString[0])
...{
case '+':
...{
match("+");
tmp += term();
break;
}
case '-':
...{
match("-");
tmp -= term();
break;
}
}
}
return tmp;
}
double term()
... {
double tmp = factor();
while(tokenString[0] == '*' || tokenString[0] == '/')
...{
switch(tokenString[0])
...{
case '*':
...{
match("*");
tmp *= factor();
break;
}
case '/':
...{
match("/");
tmp /= factor();
break;
}
}
}
return tmp;
}
double factor()
... {
double tmp;
if(tokenString[0] == '(')
...{
match("(");
tmp = exp();
match(")");
}
else if(isdigit(tokenString[0]))
...{
tmp = atof(tokenString);
getToken();
}
else
error();
return tmp;
}
int main()
... {
while(scanf("%s", buffer) != EOF)
...{
bufIndex = 0;
getToken();
printf("%s = ", buffer);
printf("%lf ", exp());
}
return 0;
}
#include < ctype.h >
#include < stdlib.h >
#include < string .h >
#define BUFLEN 1024
#define MAXTOKENLEN 256
// token的类型
typedef enum
... {
// 文件结尾
ENDFILE,
// 表达式错误
ERROR,
// + - * / ( )符号之一
SYMBOL,
// 数字
NUM,
} TokenType;
// DFA中的状态
typedef enum
... {
START, // 开始状态
INUM, // 数字的中间状态
DOT, // 接受到符号.的状态
END // 结束状态
} StateType;
int bufIndex;
char buffer[BUFLEN]; // 缓冲区
char tokenString[MAXTOKENLEN + 1 ]; // 当前token
double exp();
double term();
double factor();
// 获取缓冲区下一个字符
char getNextChar()
... {
int len = strlen(buffer);
if(bufIndex < len)
return buffer[bufIndex++];
else
return EOF;
}
// 缓冲区回退一个字符
void ungetNextChar()
... {bufIndex--;}
// 获取缓冲区下一个token,并返回token的类型
TokenType getToken()
... {
int tokenIndex = 0;
TokenType currentType;
StateType state = START;
bool save;
while(state != END)
...{
char c = getNextChar();
save = true;
switch(state)
...{
case START:
...{
if(isdigit(c))
state = INUM;
else if(c == '+' || c == '-' || c == '*' || c == '/' ||
c == '(' || c == ')')
...{
currentType = SYMBOL;
state = END;
}
else if(c == EOF)
...{
currentType = ENDFILE;
state = END;
}
else
...{
currentType = ERROR;
state = END;
}
break;
}
case INUM:
...{
if(c == '.')
state = DOT;
else if(c == EOF)
...{
currentType = NUM;
state = END;
}
else if(!isdigit(c))
...{
ungetNextChar();
save = false;
currentType = NUM;
state = END;
}
break;
}
case DOT:
...{
if(isdigit(c))
state = INUM;
else
...{
currentType = ERROR;
state = END;
}
break;
}
}
if(save && tokenIndex < MAXTOKENLEN)
tokenString[tokenIndex++] = c;
if(state == END)
tokenString[tokenIndex] = '/0';
}
return currentType;
}
// 输出错误信息
void error()
... {
printf("syntax error! ");
exit(1);
}
// parsing中的match函数
void match( const char * expectedToken)
... {
if(strcmp(expectedToken, tokenString) == 0)
getToken();
else
error();
}
double exp()
... {
double tmp = term();
while(tokenString[0] == '+' || tokenString[0] == '-')
...{
switch(tokenString[0])
...{
case '+':
...{
match("+");
tmp += term();
break;
}
case '-':
...{
match("-");
tmp -= term();
break;
}
}
}
return tmp;
}
double term()
... {
double tmp = factor();
while(tokenString[0] == '*' || tokenString[0] == '/')
...{
switch(tokenString[0])
...{
case '*':
...{
match("*");
tmp *= factor();
break;
}
case '/':
...{
match("/");
tmp /= factor();
break;
}
}
}
return tmp;
}
double factor()
... {
double tmp;
if(tokenString[0] == '(')
...{
match("(");
tmp = exp();
match(")");
}
else if(isdigit(tokenString[0]))
...{
tmp = atof(tokenString);
getToken();
}
else
error();
return tmp;
}
int main()
... {
while(scanf("%s", buffer) != EOF)
...{
bufIndex = 0;
getToken();
printf("%s = ", buffer);
printf("%lf ", exp());
}
return 0;
}
运行结果
输入:0.265/(58-6.36*1.25)+3.16-((2.356/69.01)-3.6))
输出:0.265/(58-6.36*1.25)+3.16-((2.356/69.01)-3.6)) = 6.731155