广义表具有良好的递归结构,利用递归下降分析解析广义表字符串的合法性的C++代码如下
#include <iostream>
#include <string>
#include <utility>
#include <memory>
using namespace std;
enum class token_type { left_bracket, right_bracket, comma, id };
class Token
{
public:
Token(token_type t) :type(t), id("") {}
Token(const string& i) :id(i), type(token_type::id) {}
token_type type;
string id;
};
enum class lexer_abnormal {end, illegal_char, success};
pair<shared_ptr<Token>, lexer_abnormal> lexer_parse(const string& gen, string::size_type& i)
{
if (i == gen.size())
return { nullptr, lexer_abnormal::end };
switch (gen[i])
{
case '(': {++i; return { make_shared<Token>(Token(token_type::left_bracket)),lexer_abnormal::success }; }
case ')': {++i; return { make_shared<Token>(Token(token_type::right_bracket)),lexer_abnormal::success }; }
case ',': {++i; return { make_shared<Token>(Token(token_type::comma)),lexer_abnormal::success }; }
default:
{
if (gen[i] < 33 || gen[i] == 127)
{
return { nullptr, lexer_abnormal::illegal_char };
}
string result;
while (i != gen.size() && gen[i] >= 33 && gen[i] != 127 && gen[i] != '(' && gen[i] != ',' && gen[i] != ')')
{
result.append(1, gen[i]);
++i;
}
return { make_shared<Token>(Token(result)),lexer_abnormal::success };
}
}
}
shared_ptr<Token> Lex(const string& gen, string::size_type& i)
{
pair<shared_ptr<Token>, lexer_abnormal> t = lexer_parse(gen, i);
if (t.second == lexer_abnormal::illegal_char)
{
cout << "ERROR:下标" << i << "存在非法的字符" << endl;
exit(-1);
}
if (t.second == lexer_abnormal::end)
{
cout << "ERROR:字符串已扫描完毕,但解析过程尚未结束,不完整的表达式" << endl;
exit(-1);
}
return t.first;
}
bool parseSubList(const string& gen, string::size_type& i)
{
string::size_type start = i;
while (true)
{
string::size_type pos = i;
shared_ptr<Token> cur = Lex(gen, i);
if (cur->type == token_type::id)
{
shared_ptr<Token> next = Lex(gen, i);
if (next->type == token_type::left_bracket)
{
if (parseSubList(gen, i) == false)
return false;
shared_ptr<Token> after = Lex(gen, i);
if (after->type == token_type::right_bracket)
return true;
if (after->type == token_type::left_bracket || after->type == token_type::id)
{
cout << "ERROR:从下标" << pos << "开始的子表后遇见意外的结尾,预期为逗号或右括号" << endl;
return false;
}
}
else if (next->type == token_type::right_bracket)
{
return true;
}
}
else if (cur->type == token_type::left_bracket)
{
cout << "ERROR:下标" << pos << "处的子表开头缺少标识符" << endl;
return false;
}
else if (cur->type == token_type::right_bracket)
{
if (start != pos)
{
cout << "ERROR:缺少从下标" << start << "开头的子表最后一个表项" << endl;
return false;
}
return true;
}
else
{
cout << "ERROR:下标" << pos << "处的逗号分隔符前缺少表项" << endl;
return false;
}
}
}
bool parseGen(const string &gen, string::size_type &i)
{
shared_ptr<Token> cur = Lex(gen, i);
if (cur->type != token_type::id || (cur = Lex(gen, i))->type != token_type::left_bracket)
{
cout << "ERROR:无效的广义表开头" << endl;
return false;
}
if (parseSubList(gen, i))
{
if (i < gen.size())
{
cout << "ERROR:最外层广义表结尾后存在意外字符" << endl;
return false;
}
return true;
}
return false;
}
int main()
{
string gen = "f()k()";
string::size_type i = 0;
if (parseGen(gen, i))
{
cout << "解析成功,广义表表达式正确!" << endl;
}
else
{
cout << "ERROR:语法错误!" << endl;
}
return 0;
}