词法分析与 LL(1)语法分析

最新推荐文章于 2023-06-04 06:00:00 发布

疯狂的面包

最新推荐文章于 2023-06-04 06:00:00 发布

阅读量2.1k

点赞数

分类专栏： Code 文章标签： token classification terminal string integer table

本文链接：https://blog.csdn.net/cxmanzhao/article/details/6554595

版权

Code 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

词法分析

lex.h#pragma once #ifndef LEX_H #define LEX_H #include <fstream> #include <string> #include <cassert> typedef long PreciseInteger;//暂时使用long,假设PreciseInteger能存储任何整数类型吧，有机会再完善 typedef double PreciseFloat;//暂时使用doulbe,假设PreciseFloat能存储任何类型的浮点数，有机会再完善 //枚举关键字关键字对应的代码就是枚举值 enum Keyword { //无效关键字 KEYWORD_INVALID, //数据类型关键字 KEYWORD_CHAR, KEYWORD_SHORT, KEYWORD_INT, KEYWORD_LONG, KEYWORD_FLOAT, KEYWORD_DOULBE, KEYWORD_SIGNED, KEYWORD_UNSIGNED, KEYWORD_ENUM, KEYWORD_UNION, KEYWORD_STRUCT, KEYWORD_VOID, //控制语句关键字 //循环 KEYWORD_FOR, KEYWORD_DO, KEYWORD_WHILE, KEYWORD_BREAK, KEYWORD_CONTINUE, //条件 KEYWORD_IF, KEYWORD_ELSE, KEYWORD_GOTO, //开关 KEYWORD_SWITCH, KEYWORD_CASE, KEYWORD_DEFAULT, //返回 KEYWORD_RETURN, //存储类型 KEYWORD_AUTO, KEYWORD_EXTERN, KEYWORD_REGISTER, KEYWORD_STATIC, //其它 KEYWORD_CONST, KEYWORD_SIZEOF, KEYWORD_TYPEDEF, KEYWORD_VOLATILE }; const int KeywordNumber = KEYWORD_VOLATILE + 1; //操作符枚举的值就是操作符的值 enum Operator { OPERATOR_INVALID, OPERATOR_OROR, // || OPERATOR_ANDAND, // && OPERATOR_EQEQ, // == OPERATOR_NOTEQ, // != OPERATOR_LT, // < OPERATOR_LE, // <= OPERATOR_GT, // > OPERATOR_GE, // >= OPERATOR_PLUS, // + OPERATOR_MINUS, // - OPERATOR_OR, // | OPERATOR_XOR, // ^ OPERATOR_MULT, // * OPERATOR_DIV, // / OPERATOR_MOD, // % OPERATOR_LSHIFT, // << OPERATOR_RSHIFT, // >> OPERATOR_AND, // & OPERATOR_NOT, // ! OPERATOR_EQ, // = OPERATOR_PLUSEQ, // += OPERATOR_MINUSEQ, // -= OPERATOR_OREQ, // |= OPERATOR_XOREQ, // ^= OPERATOR_MULTEQ, // *= OPERATOR_DIVEQ, // /= OPERATOR_MODEQ, // %= OPERATOR_LSHIFTEQ, // <<= OPERATOR_RSHIFTEQ, // >>= OPERATOR_ANDEQ, // &= OPERATOR_PLUSPLUS, // ++ OPERATOR_MINUSMINUS, // -- // ? : 比较特殊是3元运算符 OPERATOR_CONDITON_BEGIN, //? OPERATOR_CONDITON_END, //: OPERATOR_SEMICOLON, // ; OPERATOR_DOT, // . OPERATOR_RPOINTOR, // -> OPERATOR_COMMA, // , OPERATOR_LPAREN, // ( OPERATOR_RPAREN, // ) OPERATOR_LCURLY, // { OPERATOR_RCURLY, // } OPERATOR_LSQUARE, // [ OPERATOR_RSQUARE // ] }; const int OperatorNumber = OPERATOR_RSQUARE + 1; //符号 class Token { public: //符号分类 enum Classification { TOKEN_INVALID,//无效 TOKEN_EOF,//文件结束 TOKEN_KEYWORD,//关键字 TOKEN_IDENTIFIER,//标识符 TOKEN_OPERATOR,//操作符 TOKEN_STRING,//字符串 TOKEN_INTEGER,//整数 TOKEN_FLOAT,//浮点数 TOKEN_CHAR,//字符 }; public: Token(const Token& t); Token& operator=(const Token& t); bool operator==(const Token& t); //产生各种Token static Token make_invalid_token(int location) { return Token(TOKEN_INVALID, location); } static Token make_eof_token(int location) { return Token(TOKEN_EOF, location); } static Token make_keyword_token(Keyword keyword, int location) { Token token(TOKEN_KEYWORD, location); token.value_.keyword_value = keyword; return token; } static Token make_identifier_token(std::string &value, int location) { Token token(TOKEN_IDENTIFIER, location); token.value_.identifier_value = new std::string(value); return token; } static Token make_operator_token(Operator op, int location) { Token token(TOKEN_OPERATOR, location); token.value_.operator_value = op; return token; } static Token make_string_token(std::string &value, int location) { Token token(TOKEN_STRING, location); token.value_.string_value = new std::string(value); return token; } static Token make_integer_token(PreciseInteger value, int location) { Token token(TOKEN_INTEGER, location); token.value_.integer_value = value; return token; } static Token make_float_token(PreciseFloat vaule, int location) { Token token(TOKEN_FLOAT, location); token.value_.float_value = vaule; return token; } static Token make_char_token(char value, int location) { Token token(TOKEN_CHAR, location); token.value_.char_value = value; return token; } //返回Token的各种属性 Classification get_classification() const { return this->classification_; } int get_location() const { return this->source_location_; } Keyword get_keyword_value() const { assert(this->classification_ == TOKEN_KEYWORD); return this->value_.keyword_value; } std::string get_identifier_value() const { assert(this->classification_ == TOKEN_IDENTIFIER); return *(this->value_.identifier_value); } Operator get_operator_value() const { assert(this->classification_ == TOKEN_OPERATOR); return this->value_.operator_value; } std::string get_string_value() const { assert(this->classification_ == TOKEN_STRING); return *(this->value_.string_value); } PreciseInteger get_integer_value() const { assert(this->classification_ == TOKEN_INTEGER); return this->value_.integer_value; } PreciseFloat get_float_value() const { assert(this->classification_ == TOKEN_FLOAT); return this->value_.float_value; } char get_char_value() const { assert(this->classification_ == TOKEN_CHAR); return this->value_.char_value; } bool is_eof() const { if (this->classification_ == TOKEN_EOF) { return true; } return false; } ~Token(); private: //Token是由make_***_token静态函数产生的，这个构造函数不能给用户使用 //保证Token必然是Classification的某种类型 Token(Classification classification, int location) :classification_(classification), source_location_(location) { } //释放资源 void clear(); //复制Token的内容 void copy(const Token& t); Classification classification_;//Token的符号类型 unsigned int source_location_;//Token在文件中的位置 union { Keyword keyword_value;//关键字值 Operator operator_value;//操作符值 std::string* identifier_value;//标识符值即标识符名字 std::string* string_value;//字符串值 PreciseInteger integer_value;//整数的值 PreciseFloat float_value;//浮点数的值 char char_value;//字符的值 }value_; }; //词法分析器 class lex { public: lex(const char* input_file_name, std::fstream &input_file_); ~lex(void); Token next_token(); private: bool require_line(); //获取下一行 void skip_c_comment();//跳过c注释 void skip_cpp_comment();//跳过c++注释 Token gather_identifier_or_keyword(); //判定是标识符或者是关键字 Token gather_number();//判定是整数或者是浮点数 Token gather_character();//判定character Token gather_string();//判定string Keyword string_to_keyword(const std::string &str);//转换到特定的关键字码 private: //文件名 const char* input_file_name_; //输入文件 std::fstream &input_file_; //一行的buffer std::string linebuf_; //一行的真实大小 size_t line_size_; //读取到当前行的某个位置 size_t line_offset_; //读取到当前行的行号 size_t line_number_; //当前读取到的文件位置 size_t location_; }; #endif

lex.cpp#include "lex.h" struct KeywordStringTable { Keyword keyword; std::string name; }; KeywordStringTable keyword_string_mapping[] = { //数据类型关键字 KEYWORD_CHAR, "char", KEYWORD_SHORT, "short", KEYWORD_INT, "int", KEYWORD_LONG, "long", KEYWORD_FLOAT, "float", KEYWORD_DOULBE, "double", KEYWORD_SIGNED, "signed", KEYWORD_UNSIGNED, "unsigned", KEYWORD_ENUM, "enum", KEYWORD_UNION, "unio", KEYWORD_STRUCT, "struct", KEYWORD_VOID, "void", //控制语句关键字 //循环 KEYWORD_FOR, "for", KEYWORD_DO, "do", KEYWORD_WHILE, "while", KEYWORD_BREAK, "break", KEYWORD_CONTINUE, "continue", //条件 KEYWORD_IF, "if", KEYWORD_ELSE, "else", KEYWORD_GOTO, "goto", //开关 KEYWORD_SWITCH, "switch", KEYWORD_CASE, "case", KEYWORD_DEFAULT, "default", //返回 KEYWORD_RETURN, "return", //存储类型 KEYWORD_AUTO, "auto", KEYWORD_EXTERN, "extern", KEYWORD_REGISTER, "register", KEYWORD_STATIC, "static", //其它 KEYWORD_CONST, "const", KEYWORD_SIZEOF, "sizeof", KEYWORD_TYPEDEF, "typedef", KEYWORD_VOLATILE, "volatile", }; static size_t mapping_table_size = sizeof(keyword_string_mapping) / sizeof(KeywordStringTable); Token::Token(const Token& t) { copy(t); } Token& Token::operator=(const Token& t) { if (&t != this) { this->clear(); copy(t); } return *this; } bool Token::operator==(const Token& t) { if (classification_ == t.classification_) { switch (classification_) { case TOKEN_KEYWORD: return value_.keyword_value == t.value_.keyword_value; case TOKEN_IDENTIFIER: return *value_.identifier_value == *t.value_.identifier_value; case TOKEN_OPERATOR: return value_.operator_value == t.value_.operator_value; case TOKEN_STRING: return *value_.string_value == *t.value_.string_value; case TOKEN_INTEGER: return value_.integer_value == t.value_.integer_value; case TOKEN_FLOAT: return value_.float_value == t.value_.float_value; case TOKEN_CHAR: return value_.char_value == t.value_.char_value; default: return true; } } return false; } void Token::copy(const Token& t) { this->source_location_ = t.source_location_; this->classification_ = t.classification_; switch (this->classification_) { case TOKEN_INVALID: case TOKEN_EOF: break; case TOKEN_KEYWORD: this->value_.keyword_value = t.value_.keyword_value; break; case TOKEN_IDENTIFIER: this->value_.identifier_value = new std::string(*(t.value_.identifier_value)); break; case TOKEN_OPERATOR: this->value_.operator_value = t.value_.operator_value; break; case TOKEN_STRING: this->value_.string_value = new std::string(*(t.value_.string_value)); break; case TOKEN_INTEGER: this->value_.integer_value = t.value_.integer_value; break; case TOKEN_FLOAT: this->value_.float_value = t.value_.float_value; break; case TOKEN_CHAR: this->value_.char_value = t.value_.char_value; break; } } void Token::clear() { if (this->classification_ == TOKEN_IDENTIFIER) { delete this->value_.identifier_value; } else if (this->classification_ == TOKEN_STRING) { delete this->value_.string_value; } } Token::~Token() { clear(); } lex::lex(const char* input_file_name, std::fstream &input_file) :input_file_name_(input_file_name), input_file_(input_file), line_size_(0), line_offset_(0), line_number_(0) { } //注意下可能没考虑到 location_的改变问题 bool lex::require_line() { if (line_offset_ < line_size_) { return true; } else if(std::getline(input_file_, linebuf_)) { line_offset_ = 0; ++line_number_; linebuf_ += '/n';//因为本来有换行符 +1 line_size_ = linebuf_.size(); return true; } return false; } void lex::skip_c_comment() { do { while(line_offset_ < line_size_) { if ((linebuf_[line_offset_] == '*') && (linebuf_[line_offset_+1] == '/')) { line_offset_ += 2; location_ += 2; return; } ++line_offset_; ++location_; } } while (require_line()); } void lex::skip_cpp_comment() { location_ += (line_size_ - line_offset_); line_offset_ = line_size_; } Token lex::gather_identifier_or_keyword() { std::string::iterator word_begin = linebuf_.begin() + line_offset_; while(line_offset_ < line_size_) { if (((linebuf_[line_offset_] >= 'a') && (linebuf_[line_offset_] <= 'z')) ||((linebuf_[line_offset_] >= 'A') && (linebuf_[line_offset_] <= 'Z')) ||((linebuf_[line_offset_] >= '0') && (linebuf_[line_offset_] <= '9')) ||(linebuf_[line_offset_] == '_')) { ++location_; ++line_offset_; } else { break; } } std::string::iterator word_end = linebuf_.begin() + line_offset_; std::string word(word_begin, word_end); Keyword keyword = string_to_keyword(word); if(keyword == KEYWORD_INVALID) { return Token::make_identifier_token(word, location_ - word.size()); } else { return Token::make_keyword_token(keyword, location_ - word.size()); } } Token lex::gather_number() { enum Number{INT, FLOAT}; Number number_type = INT; bool have_point = false; std::string::iterator number_begin = linebuf_.begin() + line_offset_; while (line_offset_ < line_size_) //由于时间关系暂不考虑科学计数法 { if ((linebuf_[line_offset_] <= '9') && (linebuf_[line_offset_] >= '0')) { ++line_offset_; ++location_; }else if ((linebuf_[line_offset_] == '.') && (have_point == false)) { have_point = true; number_type = FLOAT; ++line_offset_; ++location_; }else { break; } } std::string::iterator number_end = linebuf_.begin() + line_offset_; std::string str_number(number_begin, number_end); if (number_type == INT) { PreciseInteger integer_value = std::atoi(str_number.c_str()); return Token::make_integer_token(integer_value, location_ - str_number.size()); }else if (number_type == FLOAT) { PreciseFloat float_value = std::atof(str_number.c_str()); return Token::make_float_token(float_value, location_ - str_number.size()); } return Token::make_invalid_token(location_); } Token lex::gather_character() { //由于时间关系不考虑转义字符 ++line_offset_; ++location_; if ((line_offset_ < line_size_) && ((linebuf_[line_offset_] >= 0) && (linebuf_[line_offset_] <= 255)) && (linebuf_[line_offset_ + 1] == '/'')) { line_offset_ += 2; location_ += 2; return Token::make_char_token(linebuf_[line_offset_ - 2], location_ - 3); } else { return Token::make_invalid_token(location_); } } Token lex::gather_string() { //由于时间关系不考虑多行和转移的内容只做简单的ascii 字符的识别 ++line_offset_; ++location_; std::string::iterator string_begin = linebuf_.begin() + line_offset_; while(line_offset_ < line_size_) { if (linebuf_[line_offset_] == '"') { break; } ++line_offset_; ++location_; } if (line_offset_ == line_size_) { return Token::make_invalid_token(location_); } std::string::iterator string_end = linebuf_.begin() + line_offset_; std::string string_value(string_begin, string_end); ++line_offset_; ++location_; return Token::make_string_token(string_value, location_ - string_value.size() - 2); } Keyword lex::string_to_keyword(const std::string &str) { for (int i = 0; i < mapping_table_size; ++i) { if(str == keyword_string_mapping[i].name) return keyword_string_mapping[i].keyword; } return KEYWORD_INVALID; } Token lex::next_token() { do { if (!require_line()) { return Token::make_eof_token(++location_); } while (line_offset_ < line_size_) { char cc = linebuf_[line_offset_]; switch(cc) { case '/t':case '/r':case ' '://空白 ++line_offset_; ++location_; while ((linebuf_[line_offset_] == '/t') || (linebuf_[line_offset_] == '/r') || (linebuf_[line_offset_] == ' ')) { ++line_offset_; ++location_; } break; case '/n'://换行 ++line_offset_; ++location_; break; case '/': ++line_offset_; ++location_; if(linebuf_[line_offset_] == '/') { ++line_offset_; ++location_; skip_cpp_comment(); }else if (linebuf_[line_offset_] == '*') { ++line_offset_; ++location_; skip_c_comment(); }else if (linebuf_[line_offset_] == '=') { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_DIVEQ, location_ - 2); }else { return Token::make_operator_token(OPERATOR_DIV, location_ - 1); } break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '_': return gather_identifier_or_keyword(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return gather_number(); case '/'': return gather_character(); case '"': return gather_string(); case '+': if (linebuf_[line_offset_+1] == '+') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_PLUSPLUS, location_ - 2); }else if(linebuf_[line_offset_+1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_PLUSEQ, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_PLUS, location_ - 1); } break; case '-': if (linebuf_[line_offset_+1] == '-') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_MINUSMINUS, location_ - 2); }else if(linebuf_[line_offset_+1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_MINUSEQ, location_ - 2); } else if (linebuf_[line_offset_ + 1] == '>') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_RPOINTOR, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_MINUS, location_ - 1); } break; case '*': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_MULTEQ, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_MULT, location_ - 1); } break; case '%': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_MOD, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_MODEQ, location_ - 1); } break; case '=': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_EQEQ, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_EQ, location_ - 1); } break; case '>': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_GE, location_ - 2); } else if (linebuf_[line_offset_ + 1] == '>') { if (((line_offset_ + 2) < line_size_) && (linebuf_[line_offset_ + 2] == '=')) { line_offset_ += 3; location_ += 3; return Token::make_operator_token(OPERATOR_RSHIFTEQ, location_ - 3); } line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_RSHIFT, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_GT, location_ - 1); } break; case '<': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_LE, location_ - 2); } else if (linebuf_[line_offset_ + 1] == '>') { if (((line_offset_ + 2) < line_size_) && (linebuf_[line_offset_ + 2] == '=')) { line_offset_ += 3; location_ += 3; return Token::make_operator_token(OPERATOR_LSHIFTEQ, location_ - 3); } line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_LSHIFT, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_LT, location_ - 1); } break; case '|': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_OREQ, location_ - 2); } else if (linebuf_[line_offset_ + 1] == '|') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_OROR, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_OR, location_ - 1); } break; case '&': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_ANDEQ, location_ - 2); } else if (linebuf_[line_offset_ + 1] == '&') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_ANDAND, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_AND, location_ - 1); } break; case '!': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_NOTEQ, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_NOT, location_ - 1); } break; case '^': if (linebuf_[line_offset_ + 1] == '=') { line_offset_ += 2; location_ += 2; return Token::make_operator_token(OPERATOR_XOREQ, location_ - 2); } else { ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_XOR, location_ - 1); } break; case ';': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_SEMICOLON, location_ - 1); break; case '(': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_LPAREN, location_ - 1); break; case ')': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_RPAREN, location_ - 1); break; case '}': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_RCURLY, location_ - 1); break; case '{': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_LCURLY, location_ - 1); break; case '[': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_LSQUARE, location_ - 1); break; case ']': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_RSQUARE, location_ - 1); break; case '.': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_DOT, location_ - 1); break; case '?': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_CONDITON_BEGIN, location_ - 1); break; case ':': ++line_offset_; ++location_; return Token::make_operator_token(OPERATOR_CONDITON_END, location_ - 1); break; default: ++line_offset_; ++location_; return Token::make_invalid_token(location_ - 1); } } }while(true); } lex::~lex(void) { }

语法分析

使用LL(1)算法

parse.h#pragma once #ifndef PARSE_H #define PARSE_H #include "lex.h" #include "LL_ONE.h" #include <fstream> #include <string> #include <stack> #include <iostream> //使用LL(1)算法的语法分析器 class Parse { public: Parse(lex *lexer, std::fstream &file); void program();//语法分析 private: lex *lex_;//词法分析器 std::fstream &file_;//输入的文件 std::stack<Symbol> symbol_stack_;//符号栈 }; #endif

parse.cpp#include "parse.h" Parse::Parse(lex *lexer, std::fstream &file) :lex_(lexer), file_(file) { if (lexer == NULL) { lex_ = new lex("", file_); } symbol_stack_.push(Symbol::make_special_symbol());//界符 symbol_stack_.push(Symbol::make_nonterminal_symbol(Nonterminal_E));//文法开始非终结符 init_LL_ONE_TABLE();//初始化LL(1)符号表 } void print_symbol(Symbol &s); void Parse::program() { Token current_token_ = lex_->next_token();//从文件中读取下一个输入符号 Symbol pop_symbol;//临时保存栈顶符号 do { pop_symbol = symbol_stack_.top();//取出栈顶元素 std::cout << "弹出-"; print_symbol(pop_symbol); symbol_stack_.pop();//将栈顶元素弹出 if (pop_symbol.is_terminal())//如果符号属于终结符 { if (pop_symbol == Symbol::make_terminal_symbol(current_token_)) { if (current_token_.get_classification() != Token::TOKEN_EOF)//文件结束为止 { current_token_ = lex_->next_token(); } else { //exit(-1);//语法有误 std::cout << "error" << std::endl; } } } else { if (!pop_symbol.is_special()) { LL_one_element element = LL_ONE_TABLE[pop_symbol.get_nonterminal_value()][token_to_terminal_value(current_token_)]; if ( element != NULL) { std::cout << std::endl; for (int i = element->size() - 1; i >= 0 ; --i) { symbol_stack_.push(element->at(i)); std::cout << "插入-"; print_symbol(element->at(i)); } std::cout << std::endl; } else { //exit(-1);//语法有误 std::cout << "error" << std::endl; } } } } while (!(pop_symbol.is_special())); } //用来查看结果的函数仅用来测试而已 void print_symbol(Symbol &s) { Symbol::SymbolType type = s.get_symbol_type(); switch(type) { case Symbol::TERMINAL: std::cout << "终结符"; if (s.get_token_value().get_classification() == Token::TOKEN_OPERATOR) { if (s.get_token_value().get_operator_value() == OPERATOR_PLUS) { std::cout << "+"; } if (s.get_token_value().get_operator_value() == OPERATOR_MULT) { std::cout << "*"; } if(s.get_token_value().get_operator_value() == OPERATOR_LPAREN) { std::cout << "("; } if(s.get_token_value().get_operator_value() == OPERATOR_RPAREN) { std::cout << ")"; } } if (s.get_token_value().get_classification() == Token::TOKEN_INTEGER) { std::cout << "整数"; } std::cout << std::endl; break; case Symbol::NONTERMINAL: std::cout << "非终结符"; if (s.get_nonterminal_value() == Nonterminal_E) { std::cout << "E"; } if (s.get_nonterminal_value() == Nonterminal_EX) { std::cout << "E'"; } if (s.get_nonterminal_value() == Nonterminal_T) { std::cout << "T"; } if (s.get_nonterminal_value() == Nonterminal_TX) { std::cout << "T'"; } if (s.get_nonterminal_value() == Nonterminal_F) { std::cout << "F"; } std::cout << std::endl; break; } }

LL_ONE表格的构造和相关数据结构

LL_ONE.h#include "lex.h" #include <string> #include <vector> //非终结符 enum NonterminalSymbol; //用来保存终结符和非终结符的类 class Symbol { public: enum SymbolType{TERMINAL, NONTERMINAL, SPECIAL_TERMINAL};//终结符和非终结符 public: Symbol(){}; ~Symbol(); Symbol(SymbolType t); Symbol(const Symbol& s); Symbol& operator=(const Symbol& symbol); Token get_token_value(); NonterminalSymbol get_nonterminal_value(); bool is_terminal() const { return type_ == TERMINAL; } bool is_special() const { return type_ == SPECIAL_TERMINAL; } bool is_nonterminal() const { return type_ == NONTERMINAL; } bool operator==(Symbol &s);//判断是否是某种的相等的symbol SymbolType get_symbol_type(){return type_;} static Symbol make_terminal_symbol(Token &t); static Symbol make_nonterminal_symbol(NonterminalSymbol s); static Symbol make_special_symbol(); private: Symbol(Token &t) { type_ = TERMINAL; value_.token_ = new Token(t); } Symbol(NonterminalSymbol s) { type_ = NONTERMINAL; value_.nonterminal_value = s; } union { NonterminalSymbol nonterminal_value;//非终结符 Token *token_;//终结符的值 }value_;//值 SymbolType type_;//类型 }; //将Token的值转换到终结符的值 int token_to_terminal_value(Token &t); //要填写的非终结符的名称 //此次要填写 enum NonterminalSymbol { Nonterminal_E,//文法开始 Nonterminal_EX, Nonterminal_T, Nonterminal_TX, Nonterminal_F }; //非终结符的数量 unsigned const NonterminalNumber = Nonterminal_F + 1; //LL表的元素 typedef std::vector<Symbol>* LL_one_element; typedef std::vector<Symbol> LL_one_element_entry; extern LL_one_element LL_ONE_TABLE[NonterminalNumber][KeywordNumber + OperatorNumber + 2]; void init_LL_ONE_TABLE();

LL_ONE.cpp#include "LL_ONE.h" Symbol::Symbol(SymbolType t) { type_ = t; } Symbol::~Symbol() { if(is_terminal()) { delete value_.token_; } } Symbol::Symbol(const Symbol& s) { if(s.is_terminal()) { type_ = TERMINAL; value_.token_ = new Token(*(s.value_.token_)); } else if(s.is_nonterminal()) { type_ = NONTERMINAL; value_.nonterminal_value = s.value_.nonterminal_value; } else if(s.is_special()) { type_ = SPECIAL_TERMINAL; } } Symbol Symbol::make_terminal_symbol(Token &t) { return Symbol(t); } Symbol Symbol::make_nonterminal_symbol(NonterminalSymbol s) { return Symbol(s); } Symbol Symbol::make_special_symbol() { return Symbol(SPECIAL_TERMINAL); } bool Symbol::operator==(Symbol &s) { if (s.is_terminal() && this->is_terminal()) { Token::Classification classification = this->get_token_value().get_classification(); Token::Classification classification_s = s.get_token_value().get_classification(); if (classification == classification_s) { switch(classification) { case Token::TOKEN_INTEGER: return true; case Token::TOKEN_OPERATOR: return this->get_token_value().get_operator_value() == s.get_token_value().get_operator_value(); case Token::TOKEN_KEYWORD: return this->get_token_value().get_keyword_value() == s.get_token_value().get_keyword_value(); case Token::TOKEN_EOF: return true; } return false; } return false; } if (s.is_nonterminal() && this->is_nonterminal()) { return s.get_nonterminal_value() == this->get_nonterminal_value(); } return false; } Symbol& Symbol::operator=(const Symbol& symbol) { if (&symbol != this) { if (this->is_terminal()) { delete this->value_.token_; } if(symbol.is_terminal()) { this->type_ = TERMINAL; this->value_.token_ = new Token(*(symbol.value_.token_)); } else if(symbol.is_nonterminal()) { this->type_ = NONTERMINAL; this->value_.nonterminal_value = symbol.value_.nonterminal_value; } else { this->type_ = SPECIAL_TERMINAL; } } return *this; } Token Symbol::get_token_value() { assert(type_ == TERMINAL); return *(value_.token_); } NonterminalSymbol Symbol::get_nonterminal_value() { assert(type_ == NONTERMINAL); return value_.nonterminal_value; } int token_to_terminal_value(Token &t) { int value = 0; Token::Classification classification = t.get_classification(); switch(classification) { case Token::TOKEN_KEYWORD: value += t.get_keyword_value(); break; case Token::TOKEN_OPERATOR: value += KeywordNumber + t.get_operator_value(); break; case Token::TOKEN_INTEGER: value += KeywordNumber + OperatorNumber; break; case Token::TOKEN_EOF: value += KeywordNumber + OperatorNumber + 1; break; } return value; } //LL(1)表其中+2表示一个是整数另一个是结束符的列 LL_one_element LL_ONE_TABLE[NonterminalNumber][KeywordNumber + OperatorNumber + 2] = {NULL}; void init_LL_ONE_TABLE() { static LL_one_element_entry expr_empty;//代表空啊那个类似e的符号 //E static LL_one_element_entry expr_e; expr_e.push_back(Symbol::make_nonterminal_symbol(Nonterminal_T)); expr_e.push_back(Symbol::make_nonterminal_symbol(Nonterminal_EX)); //E' static LL_one_element_entry expr_ex; expr_ex.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_PLUS, 0))); expr_ex.push_back(Symbol::make_nonterminal_symbol(Nonterminal_T)); expr_ex.push_back(Symbol::make_nonterminal_symbol(Nonterminal_EX)); //T static LL_one_element_entry expr_t; expr_t.push_back(Symbol::make_nonterminal_symbol(Nonterminal_F)); expr_t.push_back(Symbol::make_nonterminal_symbol(Nonterminal_TX)); //T' static LL_one_element_entry expr_tx; expr_tx.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_MULT, 0))); expr_tx.push_back(Symbol::make_nonterminal_symbol(Nonterminal_F)); expr_tx.push_back(Symbol::make_nonterminal_symbol(Nonterminal_TX)); //F static LL_one_element_entry expr_f_1; expr_f_1.push_back(Symbol::make_terminal_symbol(Token::make_integer_token(1, 0))); static LL_one_element_entry expr_f_2; expr_f_2.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_LPAREN, 0))); expr_f_2.push_back(Symbol::make_nonterminal_symbol(Nonterminal_E)); expr_f_2.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_RPAREN, 0))); //填充表格 LL_ONE_TABLE[Nonterminal_E][token_to_terminal_value(Token::make_integer_token(1, 0))] = &expr_e; LL_ONE_TABLE[Nonterminal_E][token_to_terminal_value(Token::make_operator_token(OPERATOR_LPAREN, 0))] = &expr_e; LL_ONE_TABLE[Nonterminal_EX][token_to_terminal_value(Token::make_operator_token(OPERATOR_PLUS, 0))] = &expr_ex; LL_ONE_TABLE[Nonterminal_EX][token_to_terminal_value(Token::make_operator_token(OPERATOR_RPAREN, 0))] = &expr_empty; LL_ONE_TABLE[Nonterminal_EX][KeywordNumber + OperatorNumber + 1] = &expr_empty; LL_ONE_TABLE[Nonterminal_T][token_to_terminal_value(Token::make_integer_token(1, 0))] = &expr_t; LL_ONE_TABLE[Nonterminal_T][token_to_terminal_value(Token::make_operator_token(OPERATOR_LPAREN, 0))] = &expr_t; LL_ONE_TABLE[Nonterminal_TX][token_to_terminal_value(Token::make_operator_token(OPERATOR_PLUS, 0))] = &expr_empty; LL_ONE_TABLE[Nonterminal_TX][token_to_terminal_value(Token::make_operator_token(OPERATOR_MULT, 0))] = &expr_tx; LL_ONE_TABLE[Nonterminal_TX][token_to_terminal_value(Token::make_operator_token(OPERATOR_RPAREN, 0))] = &expr_empty; LL_ONE_TABLE[Nonterminal_TX][KeywordNumber + OperatorNumber + 1] = &expr_empty; LL_ONE_TABLE[Nonterminal_F][token_to_terminal_value(Token::make_integer_token(1, 0))] = &expr_f_1; LL_ONE_TABLE[Nonterminal_F][token_to_terminal_value(Token::make_operator_token(OPERATOR_LPAREN, 0))] = &expr_f_2; }

疯狂的面包

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
词法分析与 LL(1)语法分析

词法分析lex.h#pragma once#ifndef LEX_H#define LEX_H#include #include #include typedef long PreciseInteger;//暂时使用long,假设PreciseInteger能存储任何整数类型吧，有机会再完善typedef double PreciseFloat;//暂时使用doul
复制链接

扫一扫