用递归下降的语法分析器实现简单的计算器:
看了这篇博客之后,总算搞明白了以前编译原理没搞懂的ll文法的意义。下面用这种方法实现一个简单的计算机器程序:
定义语法
为了体现出递归下降法简洁之处,计算表达式采用lisp语言的方式来书写,这样就能避免中缀表达式转换成后缀表达式,破坏了递归下降的格局。在lisp语言里,会强制指出运算符优先级:(1+2 * 5)*(3+4)在LISP会被写成(*(+ 1 (* 5 2)) (+ 3 4) )
其BNF范式如下:
Operator=”+”
Operator=”-“
Operator=”*”
Operator=”/”
Expression=<0,1,2,3,4,..,9>
Expression= “(” Operator Expression Expression “)”
Expression=“(”Expression “)”
Operator=”-“
Operator=”*”
Operator=”/”
Expression=<0,1,2,3,4,..,9>
Expression= “(” Operator Expression Expression “)”
Expression=“(”Expression “)”
实现
#include <string>
#include <memory>
struct Expression
{
Expression() : result(), Error(), Start(NULL) {}
float result;
std::string Error;
char* Start;
};
class Lexer
{
public:
Lexer(const std::string& str);
private:
void SkipWhitespace();
bool match(const char* text);
char isOpreator();
Expression GetExpression();
Expression GetNumber();
std::unique_ptr<char[]> buffer;
size_t cur;
size_t size;
};
#include "Parser.h"
#include <iostream>
#include <cctype>
#include <cassert>
Lexer::Lexer(const std::string& str):cur(),size()
{
size_t len = str.length();
std::unique_ptr<char[]> ss(new char[len + 1]);
memcpy(ss.get(),str.c_str(),len);
ss[len] = 0;
buffer = std::move(ss);
size = len + 1;
//std::cout << buffer.get() <<std::endl; //<<<<<<<<<debug : this line should be delete
//SkipWhitespace();
//
Expression exp = GetExpression();
if (exp.Error.empty()) {
std::cout << exp.result <<std::endl; //<<<<<<<<<debug : this line should be delete
} else {
std::cout << "发生错误:" <<std::endl;
std::cout << "位置: " << exp.Start <<std::endl;
std::cout << "信息: " << exp.Error <<std::endl;
}
}
void Lexer::SkipWhitespace()
{
while (buffer[cur] == ' ' && cur != size) {
++cur;
}
}
bool Lexer::match(const char* text)
{
size_t len = strlen(text);
SkipWhitespace();
if (strncmp(&buffer[cur],text,len) == 0) {
cur += len;
return true;
}
return false;
}
char Lexer::isOpreator()
{
if (match("+") || match("-") || match("*") || match("/")) {
return buffer[cur - 1];//返回操作符
}
return 0;
}
Expression Lexer::GetExpression()
{
Expression ret = GetNumber(); //第一个字符是数字或者表达式
if (!ret.Error.empty()) { //第一个不是数字,匹配括号
if (match("(")) { // (...)
//匹配操作符
char op = isOpreator(); //( op (exp) (exp) )
if (op > 0) {
ret.Error = "";
//获取左参数
Expression left = GetExpression();
if (!left.Error.empty()) { //left出错
return left;
}
//获取右边的表达式
Expression right = GetExpression();
if (!right.Error.empty()) {
return right;
}
switch (op) {
case '+':
ret.result = left.result + right.result;
break;
case '-':
ret.result = left.result - right.result;
break;
case '*':
ret.result = left.result * right.result;
break;
case '/':
if (right.result == 0) {
ret.Error = "被除数不能为0";
ret.Start = static_cast<char*>(&buffer[cur]);
} else {
ret.result = left.result / right.result;
}
break;
default:
assert(false && "should never get here");
break;
}
} else { //不是操作符 (expression)
ret = GetExpression();
if (!ret.Error.empty()) {
return ret;
}
}
if (!match(")")) {
ret.Error = "此处缺少反括号)";
}
} else {
ret.Error = "此处出现未知字符";
ret.Start = static_cast<char*>(&buffer[cur]);
return ret;
}
}
return ret;
}
Expression Lexer::GetNumber()
{
Expression ret;
SkipWhitespace();
bool getnum = false;
bool negative = false;
int point = 0;
while (true) {
char c = buffer[cur];
if (isdigit(c)) {
float ci = c - '0';
if (point > 0) { //小数点后面
for (int i = 0; i < point; ++i) {
ci /= 10;
}
++point;
ret.result = ret.result + ci;
} else {
ret.result = ret.result * 10 + ci;
}
//多个数字
++cur;
getnum = true;
} else if (c == '-' && !getnum) { //第一位是负数
negative = true;
++cur;
} else if (c == '.') { //小数点
point = 1;
++cur;
} else {
break;
}
}
if (negative) {
ret.result = - ret.result;
}
if (!getnum) {
ret.Error = "这里应该是数字";
ret.Start = static_cast<char*>(&buffer[cur]);
}
return ret;
}
#include <iostream>
#include <string>
#include "Parser.h"
#pragma warning(disable:4996)
using namespace std;
int main(int argc,char* argv[])
{
freopen("input.txt","rt",stdin);
freopen("output.txt","wt",stdout);
std::string input;
while (getline(std::cin,input)) {
Lexer lex(input);
}
return 0;
}
测试:
(* (+ 1.003 2) (+ 3 -4) )
(/ (+ 5 6) (- b3 5) )
(+ 3 5 )
输出:
-3.003
发生错误:
位置: b3 5) )
信息: 此处出现未知字符
8