基于Predictive Parsing的ABNF语法分析器(十二)——alternation、concatenation、group和option

今天一鼓作气再写多点东西吧,这个题目差不多接近尾声了。来看看alternation、concatenation、group和option的解析代码:

/*
    This file is one of the component a Context-free Grammar Parser Generator,
    which accept a piece of text as the input, and generates a parser
    for the inputted context-free grammar.
    Copyright (C) 2013, Junbiao Pan (Email: panjunbiao@gmail.com)

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

//		        alternation    =  concatenation
//		                          *(*c-wsp "/" *c-wsp concatenation)
	protected Alternation alternation() throws IOException, MatchException {
		Alternation alternation = new Alternation();
//              每个alternation至少有一个候选项,这个候选项的类型是concatenation(连结项)
		alternation.addConcatenation(concatenation());
//      从第二个候选项开始,每个候选项都是都是以空格(可选)以及“/”引导的,
//      因此,只要遇到空格或者/号,就认为接下来的又是一个候选项
//      当然,如果遇到空格但后面跟的不是/号,又或者如果/号之后跟的不是候选项,
//      那就只能异常了,因为这个算法不能回溯到空格或者/号之前
        while (match(is.peek(), new int[] { 0x20, ';', '/'})) {
//          如遇到空格或者分号,则进入c_wsp()
            while (match(is.peek(), 0x20) || match(is.peek(), ';')) {
				c_wsp();
			}
//          此处必须是/号了,否则异常,没有办法回溯
            assertMatch(is.peek(), '/');
            is.read();
//          /号后面可以跟若干空格或注释
            while (match(is.peek(), 0x20) || match(is.peek(), ';')) {
	        c_wsp();
	    }
//          空格之后的新的候选项,候选项本身是concatenation,所以进入相应的函数。
	    alternation.addConcatenation(concatenation());
	    }
	    return alternation;
	}

//		        concatenation  =  repetition *(1*c-wsp repetition)
	protected Concatenation concatenation() throws IOException, MatchException {
		Concatenation concatenation = new Concatenation();
//              一个concatenation是由至少一个repetition组成的,
//              这些repetition有先后顺序之分,用若干空格隔开
		concatenation.addRepetition(repetition());
//      后面有空格或分号,则认为会接着一个repetition
//      其实这样是不严谨的,因为空格后面其实不必然是repetition,
//      也可能是其他文法单位,但作为一个手工编写的解析器
//      暂时接受它诸多的缺陷吧。
        while (match(is.peek(), 0x20) || match(is.peek(), ';')) {
            while (match(is.peek(), 0x20) || match(is.peek(), ';')) {
				c_wsp();
			}
			concatenation.addRepetition(repetition());
		}
		return concatenation;
	}

//	        group          =  "(" *c-wsp alternation *c-wsp ")"
	protected Group group() throws IOException, MatchException {
//      一个group以左圆括号引导
        assertMatch(is.peek(), '(');
        is.read();
//      括号后面的若干空格
		while (match(is.peek(), new int[] {0x20, ';', 0x0D})) {
			c_wsp();
		}
//      一个group包含一个alternation
		Alternation alternation = alternation();
        while (match(is.peek(), new int[] {0x20, ';', 0x0D})) {
			c_wsp();
		}
//      以右圆括号结束
        assertMatch(is.peek(), ')');
        is.read();
		return new Group(alternation);
	}

//		        option         =  "[" *c-wsp alternation *c-wsp "]"
//      option与group类似,差别在于是方括号而不是圆括号。
	protected Option option() throws IOException, MatchException {
		assertMatch(is.peek(), '[');
        is.read();
        while (match(is.peek(), new int[] {0x20, ';', 0x0D})) {
			c_wsp();
		}
		Alternation alternation = alternation();
        while (match(is.peek(), new int[] {0x20, ';', 0x0D})) {
			c_wsp();
		}
		assertMatch(is.peek(), ']');
        is.read();
		return new Option(alternation);
	}

测试用例方面,其实没啥好说的,在测试alternation的时候,主要是将一个比较复杂的concatenation复制给input,然后测试input + "/" + input形式的候选项的情况。

/*
    This file is one of the component a Context-free Grammar Parser Generator,
    which accept a piece of text as the input, and generates a parser
    for the inputted context-free grammar.
    Copyright (C) 2013, Junbiao Pan (Email: panjunbiao@gmail.com)

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

    //		        alternation    =  concatenation
//		                          *(*c-wsp "/" *c-wsp concatenation)
    @Test
    public void testAlternation() throws Exception {
        Tester<Alternation> tester = new Tester<Alternation>() {
            @Override
            public Alternation test(AbnfParser parser) throws MatchException, IOException {
                return parser.alternation();
            }
        };

        Alternation alternation;
        String input;
        input = "A B C %xff \"abc\" <12-34>";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        Assertion.assertMatch(input + "/" + input, tester, alternation, 50, 1);

        input = "A";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        Assertion.assertMatch(input + "/" + input, tester, alternation, 4, 1);

        input = "A";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
//        TODO
//        Does not support currently
        Assertion.assertMatchException(input + " / " + input, tester, 3, 1);

        input = "(A B C D E)";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        Assertion.assertMatch(input + "/" + input, tester, alternation, 24, 1);

        input = "[A B C D E]";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        Assertion.assertMatch(input + "/" + input, tester, alternation, 24, 1);

        input = "*(A B C)";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        Assertion.assertMatch(input + "/" + input, tester, alternation, 18, 1);

        input = "1*2(A B C)";
        alternation = new Alternation();
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        alternation.addConcatenation(AbnfParserFactory.newInstance(input).concatenation());
        Assertion.assertMatch(input + "/" + input, tester, alternation, 22, 1);

    }

    //		        concatenation  =  repetition *(1*c-wsp repetition)
    @Test
    public void testConcatenation() throws Exception {
        Tester<Concatenation> tester = new Tester<Concatenation>() {
            @Override
            public Concatenation test(AbnfParser parser) throws MatchException, IOException {
                return parser.concatenation();
            }
        };

        String input;
        input = "a b c *a 1*b 1*2c *3d %d88 %x11.22.33 %b00-1111 *(aa bb) *2[a b c] 5*(a/b)";
        Assertion.assertMatch(input, tester, AbnfParserFactory.newInstance(input).concatenation(), 75, 1);
        input = "a b  c  d    e";
        Assertion.assertMatch(input, tester, AbnfParserFactory.newInstance(input).concatenation(), 15, 1);

//        TODO
//        Does not support currently
        input = " a ";
        Assertion.assertMatchException(input, tester, 1, 1);
    }

    //		        group          =  "(" *c-wsp alternation *c-wsp ")"
    @Test
    public void testGroup() throws Exception {
        Tester<Group> tester = new Tester<Group>() {
            @Override
            public Group test(AbnfParser parser) throws MatchException, IOException {
                return parser.group();
            }
        };
        Alternation alternation = AbnfParserFactory.newInstance("A/B").alternation();
        Assertion.assertMatch("(A/B)", tester, new Group(alternation), 6, 1);
//        TODO
//        Does not support this case
        Assertion.assertMatchException("(  A/B  )", tester, 9, 1);
    }

    //		        option         =  "[" *c-wsp alternation *c-wsp "]"
    @Test
    public void testOption() throws Exception {
        Tester<Option> tester = new Tester<Option>() {
            @Override
            public Option test(AbnfParser parser) throws MatchException, IOException {
                return parser.option();
            }
        };
        Alternation alternation = AbnfParserFactory.newInstance("A/B").alternation();
        Assertion.assertMatch("[A/B]", tester, new Option(alternation), 6, 1);
//        TODO
//        Does not support this case
        Assertion.assertMatchException("[  A/B  ]", tester, 9, 1);

    }


LL(1)语法分析器是一种自顶向下语法分析器,其原理是基于预测分析表(predictive parsing table)和递归下降分析(recursive descent parsing)。它的主要步骤包括分析器构造、预测分析表生成和递归下降分析。 1. 分析器构造 LL(1)语法分析器需要先构造一个文法的预测分析表,该表包含了每个产生式在不同的输入符号下的预测动作。预测分析表的构造是通过对文法进行分析和处理得到的。 2. 预测分析表生成 预测分析表中的每个表项由两个信息组成:一个是非终结符号,另一个是终结符号。预测分析表中的每个表项都对应了一个产生式,该产生式是一个以该非终结符号开头的右部。这样,当LL(1)语法分析器读入一个终结符号时,就可以根据当前的非终结符号以及读入的终结符号,从预测分析表中找到对应的产生式。 3. 递归下降分析 在LL(1)语法分析器的分析过程中,分析器将读入的词法单元逐个压入栈中,并根据当前栈顶的非终结符号和读入的终结符号,在预测分析表中进行查找,得到相应的右部产生式。分析器将该产生式中的非终结符号依次压入栈中,并继续读入下一个终结符号。通过递归下降分析,分析器可以最终生成语法树或目标代码等。 总的来说,LL(1)语法分析器是一种自顶向下语法分析器,其原理是基于预测分析表和递归下降分析。通过构造预测分析表和递归下降分析,LL(1)语法分析器可以对输入的文本进行语法分析,并生成相应的语法树或目标代码等。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值