由于语法分析模块由多个不同的分析器协同完成,当一个分析器中发现语法错误,或者仅仅是该识别的非终结符识别完成,即将返回时,该分析器会将从这里开始的终结符都扔给委托它的分析器去处理。这会导致一个问题,那就是错误会从当前分析器扩散到调用栈中的其它分析器。比如输入:
if ( x == 1 2 ) { // other codes ...
在没有任何保护措施的情况下,表达式分析器遇到“2”时,会认为表达式识别结束,然后返回一个表达式节点,表征
x == 1
然后将“2”传入LR分析器。接着LR分析器一看也傻眼了,怎么来了个数字,于是也开始报错。而这一切的根源也许仅仅是因为多键入了一个空格。
为了尽可能地防止错误扩散,需要改造分析器,使它们能够容忍错误。就表达式分析而言,如果发现有连续两个因子出现,或者连续两个不应连续出现的运算符(可以连续出现的情况如 1 * -1),那么仍然继续表达式的识别。或者说,需要因子时遇到了运算符,或者当需要运算符时遇到了因子,当这两种非致命伤出现时,要用一种手法迷惑分析器,让它认为还可以继续分析下去。在这里给出一种很简单的方法,就是在终结符流中插入一个伪造的终结符
/* 需要运算符时遇到了因子 struct Token* token */ struct Token fakeOp = {token->line, PLUS, NULL, "+"}; // 报错 self->consumeToken(self, &fakeOp); self->consumeToken(self, token); /* 需要因子时遇到了运算符 struct Token* token */ struct Token fakeNum = {token->line, INTEGER, NULL, "0"}; // 报错 self->consumeToken(self, &fakeNum); self->consumeToken(self, token);
首先伪造一个终结符,传入分析器,这时分析器就认了这家伙,然后再将原来由词法分析模块传入真的终结符重新来一次,这样就达到了目的。
此外,表达式分析还有一个大敌,就是左括号太多。这一点其实也不需要什么办法去容错,只用报个错,然后把滞留在符号栈里的那些左括号忽略掉就行了。
最后是OperationAnalyser加入容错机制后需要修改后的wrapname(consumeFactor)和wrapname(consumeOperator)两个函数。它们会用到宏包括
#define isFirstFactor(x) ( ( IDENT == (x) ) || \ (( INTEGER <= (x) ) && ( REAL >= (x) )) ) #define isFirstOperator(x) (( PLUS <= (x) ) && ( OR >= (x) ))
最好将它们放到AcceptType枚举的附近,因为它们跟AcceptType枚举中某些常量的顺序关系非常密切,如果AcceptType有修改的需要,那么就得对应地修改这两个宏。
这里是修改后的函数
static ErrMsg wantFactor = "Incorrect expression: should have been a factor", wantOperator = "Incorrect expression: should have been a operator", excessLParent = "Excessive opening parenthese."; static void wrapname(consumeFactor)(struct OperationAnalyser* self, struct Token* token) { if(NOT == token->type) { self->opStack->push(self->opStack, newOperator(token->type, PRIORITY[token->type], unaryOperate)); self->needFactor = 1; } else if(MINUS == token->type || PLUS == token->type) { self->opStack->push(self->opStack, newOperator(token->type, 0, unaryOperate)); self->needFactor = 1; } else if(IDENT == token->type) { struct SyntaxAnalyser* analyser = newVariableAnalyser(); analyserStack->push(analyserStack, analyser); analyser->consumeToken(analyser, token); } else if(INTEGER == token->type) { self->numStack->push(self->numStack, newIntegerNode(atoi(token->image))); self->needFactor = 0; } else if(REAL == token->type) { self->numStack->push(self->numStack, newRealNode(atof(token->image))); self->needFactor = 0; } else if(LPARENT == token->type) { self->opStack->push(self->opStack, newOperator(token->type, 0x7fffffff, nullOperate)); self->needFactor = 1; } else { struct AbstractSyntaxNode* ret = (struct AbstractSyntaxNode*) (self->numStack->peek(self->numStack)); if(NULL == ret && 1 == self->opStack->height(self->opStack)) { self->numStack->pop(self->numStack); // 弹出 ret wrapname(cleanup)(self); struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeNonTerminal(analyser, ret); analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeToken(analyser, token); } else { /* 容错处理 */ struct Token fakeNum = {token->line, INTEGER, NULL, "0"}; fprintf(stderr, "Before `%s' ", NULL == token->image ? "End of the file." : token->image); fprintf(stderr, "Error @ line %d\n" " %s\n", token->line, wantFactor); self->consumeToken(self, &fakeNum); self->consumeToken(self, token); } } } static void wrapname(consumeOperator)(struct OperationAnalyser* self, struct Token* token) { int priority = PRIORITY[token->type]; if(0 < priority && priority < PRIORITY[LPARENT]) { /* token 是运算符 */ int push = 0; struct Operator* topOp = (struct Operator*) (self->opStack->peek(self->opStack)); push |= (priority < topOp->priority); push |= (priority == topOp->priority && topOp->rightCombination); while(!push) { topOp = (struct Operator*)(self->opStack->pop(self->opStack)); topOp->operate(topOp, self->numStack); topOp = (struct Operator*)(self->opStack->peek(self->opStack)); push |= (priority < topOp->priority); push |= (priority == topOp->priority && topOp->rightCombination); } self->opStack->push(self->opStack, newOperator(token->type, priority, OPER_FUNCS[token->type])); self->needFactor = 1; } else if(RPARENT == token->type) { struct Operator* topOp = (struct Operator*) (self->opStack->pop(self->opStack)); while(nullOperate != topOp->operate) { topOp->operate(topOp, self->numStack); topOp = (struct Operator*)(self->opStack->pop(self->opStack)); } topOp->operate(topOp, self->numStack); self->needFactor = 0; if(0 == self->opStack->height(self->opStack)) { struct AbstractSyntaxNode* ret = (struct AbstractSyntaxNode*) (self->numStack->pop(self->numStack)); wrapname(cleanup)(self); struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeNonTerminal(analyser, ret); analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeToken(analyser, token); return; } } else if (isFirstFactor(token->type)) { /* 容错处理 */ struct Token fakeOp = {token->line, PLUS, NULL, "+"}; fprintf(stderr, "Before `%s' ", NULL == token->image ? "End of the file." : token->image); fprintf(stderr, "Error @ line %d\n" " %s\n", token->line, e); self->consumeToken(self, &fakeOp); self->consumeToken(self, token); } else { struct AbstractSyntaxNode* ret; struct Operator* topOp = (struct Operator*) (self->opStack->pop(self->opStack)); while(LPARENT != topOp->op) { topOp->operate(topOp, self->numStack); topOp = (struct Operator*)(self->opStack->pop(self->opStack)); } topOp->operate(topOp, NULL); // 左括号 ret = (struct AbstractSyntaxNode*)(self->numStack->pop(self->numStack)); if(0 != self->opStack->height(self->opStack)) { fprintf(stderr, "Error @ line %d\n" " %s\n", token->line, excessLParent); } wrapname(cleanup)(self); struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeNonTerminal(analyser, ret); analyser = (struct SyntaxAnalyser*)(analyserStack->peek(analyserStack)); analyser->consumeToken(analyser, token); } }
这里的错误报告部分写得有点不太好,都是硬邦邦的fprintf,以后需要将它们抽取出来专门处理。