模型抽取
一个模型Model<term, uterm>
拥有两个元类型Term
终结符类型和Uterm
非终结符类型,对应于文法中的概念 G ( N , T , P , S ) G(N,T,P,S) G(N,T,P,S)如下:
template<typename term_t = int32_t, typename uterm_t = int32_t>
class Model {
friend class Processor;
public:
using string = std::string;
using strvec = std::vector<string>;
using symbol_t = Symbol<term_t, uterm_t>;
using model = Model<term_t, uterm_t>;
std::map<string, symbol_t> sym_table;
std::vector<Production<symbol_t>> prods;
symbol_t begin_symbol;
}
其中symtable对应 N ∪ T N\cup T N∪T,prods对应 P P P, begin symbol对应 S S S。
这个模型类只负责将文件内的模型读入到内存中,不负责具体的语法构建。
递归调用分析
递归调用分析写出的代码只针对具体一种语言,因此实用性不强。代码如下:
template<typename token_t, class Source, class TokenTable>
class RecursiveAParser {
using istream = Source;
using result_t = Result<token_t, UTerm>;
using node_t = ASTNode<token_t, UTerm>;
istream &ref;
token_t token;
result_t *result;
public:
RecursiveAParser(istream &ref): ref(ref) {
}
result_t* parse() {
result = new result_t();
result->code = ResultCode::Ok;
auto hdl = result;
read();
parseE(result->rt = result->alloc(UTerm::E, true));
result = nullptr;
return hdl;
}
private:
void read() {
ref >> token;
}
void error() {
result->code = ResultCode::Error;
std::cout << "error " << token << std::endl;
}
void parseE(node_t* &rt) {
parseT(rt->insert(result->alloc(UTerm::T, true)));
if (token != TokenTable::eof) {
parseED(rt->insert(result->alloc(UTerm::ED, true)));
}
}
void parseED(node_t* &rt) {
if (token == TokenTable::add || token == TokenTable::sub) {
rt->insert(result->alloc(token));
read();
parseT(rt->insert(result->alloc(UTerm::T, true)));
parseED(rt->insert(result->alloc(UTerm::ED, true)));
}
}
void parseT(node_t* &rt) {
parseF(rt->insert(result->alloc(UTerm::F, true)));
if (token != TokenTable::eof) {
parseTD(rt->insert(result->alloc(UTerm::TD, true)));
}
}
void parseTD(node_t* &rt) {
if (token == TokenTable::mul || token == TokenTable::div) {
rt->insert(result->alloc(token));
read();
parseF(rt->insert(result->alloc(UTerm::F, true)));
parseTD(rt->insert(result->alloc(UTerm::TD, true)));
}
}
void parseF(node_t* &rt) {
if (token == TokenTable::lbr) {
rt->insert(result->alloc(token));
read();
parseE(rt->insert(result->alloc(UTerm::E, true)));
if (token == TokenTable::rbr) {
rt->insert(result->alloc(token));
read();
} else {
error();
}
} else if (token == TokenTable::num) {
rt->insert(result->alloc(token));
read();
} else {
error();
}
}
};
istream
是lexer
的输出流,经过递归函数组织以后生成语法树。生成结果与树节点数据结构如下:
template<typename term_t, typename uterm_t>
struct Result {
using node_t = ASTNode<term_t, uterm_t>;
node_t *rt;
ResultCode code;
}
template<typename term_t, typename uterm_t>
struct ASTNode {
using symbol_t = Symbol<term_t, uterm_t>;
symbol_t symbol;
std::vector<ASTNode*