系列入口:编程实战:类C语法的编译型脚本解释器(九)编译语句
本文介绍表达式的编译。
目录
一、代码概览
表达式的编译就是不断获取下一个标识符,直到遇到不属于表达式的东西。
完整代码如下:
Expression* GetExpression(CTokens& tokens, T_VARIABLE_S& vars, Expression* pExpression, size_t& pos, char const* endch, bool half = false)
{
size_t start_pos = pos;
size_t op_pos = 0;
Token* pToken;
Expression* pOperand = NULL;//下一个操作数
Token* pOperator = NULL;//指向下一个操作符
while (true)
{
if (NULL == pOperand && NULL == pOperator)
{
tokens.MoveCurrentToken(pToken, pos);
if (tokens.IsPosNotToken(pos))
{
return pExpression;
}
if (isExpressionEnd(tokens, pos, endch))
{//空语句
if (NULL == pExpression)
{
pExpression = NewExpression();
pExpression->source_start = tokens.TokenStart(pos);
pExpression->source_end = tokens.TokenStart(pos);
if (NULL == pExpression)throw "内存不足";
}
return pExpression;
}
//取下一个操作数,可能没有(当下一个标记是操作符)
if (NULL != pOperator)cout << pOperator->text << endl;
GetOperand(tokens, vars, pOperand, pos);
//取下一个操作符
if (GetOperator(tokens, pOperator, pos))op_pos = pos - 1;
}
if (NULL == pOperand && NULL == pOperator)
{//什么都没取到,结束
return pExpression;
}
if (NULL == pExpression)
{//新建表达式
if (NULL == pOperator || isExpressionEnd(tokens, pos - 1, endch))
{
if (isExpressionEnd(tokens, pos - 1, endch))--pos;
else
{
string str;
if (!isExpressionEnd(tokens, pos, endch))
{
cout << tokens.m_tokens[pos - 1].text << " " << endch << endl;
cout << pOperand->ToString(m_source, vars) << endl;
CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "期待操作符");
}
}
pExpression = pOperand;
return pExpression;
}
else
{
pExpression = NewExpression();
if (NULL == pExpression)throw "内存不足";
pExpression->source_start = tokens.TokenStart(start_pos);
pExpression->source_end = tokens.TokenStart(op_pos);
pExpression->type = Expression::OPERATION;
pExpression->op = pOperator->text;
if (NULL != pOperand)pExpression->AddLeftOperand(*pOperand);
}
pOperand = NULL;
pOperator = NULL;
}
else
{//追加表达式
if (NULL == pOperator || isExpressionEnd(tokens, pos - 1, endch))
{//操作符为空
if (isExpressionEnd(tokens, pos - 1, endch))--pos;
pExpression->AddRightOperand(*pOperand);
pExpression->source_end = tokens.TokenStart(pos);
pOperand = NULL;
return pExpression;
}
else
{
long level1, level2;
if (NULL == pExpression->pLeftOperand())level1 = 2;//单目算符
else if (!tokens.GetOperatorLevel(pExpression->op.c_str(), level1))Throw(__FILE__, __LINE__, pExpression->op + " 操作符优先级未知或此处不应该出现此操作符");
if (NULL == pOperand)level2 = 2;//单目算符
else if (!tokens.GetOperatorLevel(pOperator->text.c_str(), level2))Throw(__FILE__, __LINE__, pOperator->text + " 操作符优先级未知或此处不应该出现此操作符");
if (level1 < level2 || level1 == level2 && tokens.IsOperatorLeftFirst(level1))
{
pExpression->AddRightOperand(*pOperand);
pExpression->source_end = tokens.TokenStart(pos - 1);
if (half)
{
--pos;//退回最后一个操作符
return pExpression;
}
Expression* tmp = NewExpression();
if (NULL == tmp)throw "内存不足";
tmp->type = Expression::OPERATION;
tmp->source_start = pExpression->source_start;
tmp->source_end = op_pos;
tmp->op = pOperator->text;
tmp->AddLeftOperand(*pExpression);
pExpression = tmp;
pOperand = NULL;
pOperator = NULL;
}
else
{
Expression* tmp = NewExpression();
if (NULL == tmp)throw "内存不足";
tmp->type = Expression::OPERATION;
if (NULL == pOperand)tmp->source_start = tokens.TokenStart(op_pos);
else tmp->source_start = pOperand->source_start;
tmp->source_end = tokens.TokenStart(op_pos);
tmp->op = pOperator->text;
tmp->AddLeftOperand(*pOperand);
pOperand = NULL;
pOperator = NULL;
pOperand = GetExpression(tokens, vars, tmp, pos, endch, true);
if (GetOperator(tokens, pOperator, pos))op_pos = pos - 1;
}
}
}
}
//return pExpression;
}
参数char const* endch指出期待什么样的结尾,分为“}”和“)”两种情况。
二、代码分析
表达式总是可以解释为运算符和操作数,操作数本身是表达式,所以表达式可以分解为运算符和操作数的递归。一个长表达式最终被拆解为树形结构:
a*b+c*d拆解为:
+
a*b c*d
所以最重要的是处理括号和运算符的优先级。
运算符的优先级按照C语言的规范,tokens.GetOperatorLevel()获取运算符的优先级:
bool GetOperatorLevel(char const* op, long& ret_level)const
{
ret_level = -1;
if (0 == strcmp(op, "["))ret_level = 1;
if (0 == strcmp(op, "."))ret_level = 1;
if (0 == strcmp(op, "!"))ret_level = 2;
if (0 == strcmp(op, "++"))ret_level = 2;
if (0 == strcmp(op, "--"))ret_level = 2;
if (0 == strcmp(op, "*"))ret_level = 3;
if (0 == strcmp(op, "/"))ret_level = 3;
if (0 == strcmp(op, "%"))ret_level = 3;
if (0 == strcmp(op, "+"))ret_level = 4;
if (0 == strcmp(op, "-"))ret_level = 4;
if (0 == strcmp(op, "<"))ret_level = 6;
if (0 == strcmp(op, "<="))ret_level = 6;
if (0 == strcmp(op, ">"))ret_level = 6;
if (0 == strcmp(op, ">="))ret_level = 6;
if (0 == strcmp(op, "=="))ret_level = 7;
if (0 == strcmp(op, "!="))ret_level = 7;
if (0 == strcmp(op, "&&"))ret_level = 11;
if (0 == strcmp(op, "||"))ret_level = 12;
if (0 == strcmp(op, "="))ret_level = 14;
if (0 == strcmp(op, "+="))ret_level = 14;
if (0 == strcmp(op, "-="))ret_level = 14;
if (0 == strcmp(op, "*="))ret_level = 14;
if (0 == strcmp(op, "/="))ret_level = 14;
if (0 == strcmp(op, "%="))ret_level = 14;
if (0 == strcmp(op, ","))ret_level = 15;
return ret_level != -1;
}
“?”没有出现是因为本脚本不支持“?:”。
代码取得下一个运算符,与之前的比较,如果下一个运算符优先级高就递归获取表达式。
函数GetOperand()用于获取操作数,操作数也是表达式,但是操作数是最简单的表达式,是常数、变量或者函数调用,以及括号括起来的表达式。代码如下:
bool GetOperand(CTokens& tokens, T_VARIABLE_S& vars, Expression*& pOperand, size_t& pos)
{
Token* pToken;
string variable_name;
Variable variable;
tokens.MoveCurrentToken(pToken, pos);
if (tokens.IsPosNotToken(pos))
{
pOperand = NULL;
}
else
{
switch (pToken->type)
{
case Token::IDENTIFIER:
if (!tokens.IsPosNotToken(pos + 1) && tokens.IsOperator(pos + 1, "("))
{
CScript* tmp_pFunction = FindFunction(pToken->text);
if (NULL!= tmp_pFunction)
{
pOperand = NewExpression();
if (NULL == pOperand)throw "内存不足";
pOperand->type = Expression::FUNCTION;
pOperand->pFunction = tmp_pFunction;
}
else
{
if (NULL == CPluginMap::GetPlugin(pToken->text))CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "未定义的函数或插件");
pOperand = NewExpression();
if (NULL == pOperand)throw "内存不足";
pOperand->type = Expression::PLUGIN;
pOperand->pPlugin.plugin_name = pToken->text;
}
pOperand->source_start = tokens.TokenStart(pos);
tokens.MoveNextToken(pToken, pos);
tokens.MoveNextToken(pToken, pos);
while (true)
{
if (tokens.IsPosNotToken(pos))CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "未期待的脚本结束,期待右括号");
if (tokens.IsOperator(pos, ")"))
{
tokens.MoveNextToken(pToken, pos);
break;
}
Expression* pParam = GetExpression(tokens, vars, NULL, pos, ",)");
pOperand->ParamList.reserve(pOperand->ParamList.size() + 1);
pOperand->ParamList.push_back(*pParam);
if (tokens.IsOperator(pos, ","))
{
tokens.MoveNextToken(pToken, pos);
if (tokens.IsOperator(pos, ")"))CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "未期待的右括号");
}
}
pOperand->source_end = tokens.TokenStart(pos);
}
else
{
pOperand = NewExpression();
if (NULL == pOperand)throw "内存不足";
pOperand->VariableName = pToken->text;
pOperand->type = Expression::VARIABLE;
pOperand->source_start = tokens.TokenStart(pos);
++pos;
pOperand->source_end = tokens.TokenStart(pos);
}
break;
case Token::NUMBER:
pOperand = NewExpression();
if (NULL == pOperand)throw "内存不足";
if (!tokens.NumberToVariable(pToken->text.c_str(), variable))CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "NumberToVariable错误");
pOperand->m_variable = variable;
pOperand->m_variable.isconst = true;
pOperand->type = Expression::CONSTANT;
pOperand->source_start = tokens.TokenStart(pos);
++pos;
pOperand->source_end = tokens.TokenStart(pos);
break;
case Token::STRING:
pOperand = new Expression;
if (NULL == pOperand)throw "内存不足";
variable = tokens.TokenToString(pToken->text.c_str());
variable.isconst = true;
pOperand->m_variable = variable;
pOperand->m_variable.isconst = true;
pOperand->type = Expression::CONSTANT;
pOperand->source_start = tokens.TokenStart(pos);
++pos;
pOperand->source_end = tokens.TokenStart(pos);
break;
case Token::OPERATOR:
if (tokens.IsOperator(pos, "("))
{
tokens.MoveNextToken(pToken, pos);
pOperand = GetExpression(tokens, vars, NULL, pos, ")");
tokens.MoveCurrentToken(pToken, pos);
if (!tokens.IsOperator(pos, ")"))CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "期待右括号");
tokens.MoveNextToken(pToken, pos);
}
break;
case Token::KEYWORD:
CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "此处不期待关键字");
break;
case Token::DELIMITER:
break;
default:
CException::Throw(__FILE__, __LINE__, m_source, tokens.TokenStart(pos), "未支持的标记类型");
break;
}
}
return NULL != pOperand;
}
结束
好累啊,感觉比当初写脚本还累,而且好多东西又看不懂了,还是注释写少了。
我说我会发全部的代码出来,不过还没想好发在哪里,因为我是想建个库的。
(这里是结束,而且是整个系列的结束)