直接上代码吧,都写到注释里面了。
/*
This file is one of the component a Context-free Grammar Parser Generator,
which accept a piece of text as the input, and generates a parser
for the inputted context-free grammar.
Copyright (C) 2013, Junbiao Pan (Email: panjunbiao@gmail.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// c-wsp = WSP / (c-nl WSP)
protected String c_wsp() throws IOException, MatchException {
// 由于c-wsp可以派生为WSP或者c-nl WSP,当我们向前看字符输入流时,
// 需要看这两个产生式的第一个字母,在龙书中也就时求FIRST(WSP)和
// FIRST(c-nl WSP)两个函数,其中:
// FIRST(WSP) = {0x20, 0x09};
// FIRST(c-nl WSP) = {';', 0x0D};
// 我们开心的看到,FIRST(WSP)和FIRST(c-nl WSP)没有交集,
// 因此只需要向前看一个字符就足够了。
switch (is.peek()) {
case 0x20: case 0x09: return WSP();
case ';':case 0x0D: return c_nl() + WSP();
default: throw new MatchException("[0x20, ';']", is.peek(), is.getPos(), is.getLine());
}
}
//WSP = SP / HTAB
// VCHAR = %x21-7E
// comment = ";" *(WSP / VCHAR) CRLF
// c-nl = comment / CRLF
// c-wsp = WSP / (c-nl WSP)
@Test
public void testC_wsp() throws MatchException, IOException, Exception {
Tester<String> tester = new Tester() {
public String test(AbnfParser parser) throws MatchException, IOException {
return parser.c_wsp();
}
};
Assertion.assertMatch("" + (char)0x20, tester, 2, 1);
Assertion.assertMatch("" + (char)0x09, tester, 2, 1);
// TODO
// Can not handle this case
// 由于我们的算法不支持回溯,当分析c-wsp的时候,如果第一个字符是0x0D,
// 上面的分析程序就会认为这是c-nl的第一个字符,因而进入c_nl()+WSP()的代码
// 但是在这个测试用例中,我们的0x0D后面却没有跟着0x0A,
// 也就是说这个c_nl()的分析过程实际上是不能成功的,
// 而读指针又不能回溯到c_nl()之前,所以只能是抛出异常了。
Assertion.assertMatchException("" + (char)0x0D + (char)0x09, tester, 1, 1);
Assertion.assertMatch("" + (char)0x20 + (char)0x20, tester, 2, 1);
Assertion.assertMatch("" + (char)0x0D + (char)0x0A + " ", tester, 2, 2);
Assertion.assertMatch("" + (char)0x0D + (char)0x0A + " ", tester, 2, 2);
Assertion.assertMatch("; " + (char)0x0D + (char)0x0A + " ", tester, 2, 2);
Assertion.assertMatch(";AbCd123\"" + (char)0x0D + (char)0x0A + " ", tester, 2, 2);
Assertion.assertMatch("; " + (char)0x0D + (char)0x0A + " " + (char)0x0D + (char)0x0A, tester, 2, 2);
}
// comment = ";" *(WSP / VCHAR) CRLF
protected String comment() throws IOException, MatchException {
String comment = "";
// 注释是分号开头
assertMatch(is.peek(), ';');
int value = is.read();
comment += (char)value;
// 如果下一个字符是0x20, 0x09,或者0x21-0x7E之间,则进入循环
// 直至输入字符不再这个范围之内。
// 循环内是一个解析WSP或者VCHAR的过程,WSP和VCHAR的FIRST交集为空,
// 因而是可以通过向前看一个字符区分开来的。
// 另外WSP/VCHAR允许0个或任意多个,所以此处使用while是可以的。
while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), 0x21, 0x7E)) {
if (match(is.peek(), 0x20) || match(is.peek(), 0x09)) comment += WSP();
else comment += VCHAR();
// if (peekMatch ==0x20 || peekMatch == 0x09) WSP();
// else if (peekMatch >= 0x21 && peekMatch <= 0x7E) VCHAR();
}
// 结束之前要匹配回车换行字符
comment += CRLF();
return comment;
}
// VCHAR = %x21-7E
// comment = ";" *(WSP / VCHAR) CRLF
@Test
public void testComment() throws Exception {
Tester<String> tester = new Tester() {
public String test(AbnfParser parser) throws MatchException, IOException {
return parser.comment();
}
};
Assertion.assertMatch(";" + (char)0x0D + (char)0x0A, tester, 1, 2);
Assertion.assertMatch(";" + (char)0x7E + (char)0x0D + (char)0x0A, tester, 1, 2);
Assertion.assertMatch(";" + " " + (char)0x0D + (char)0x0A, tester, 1, 2);
Assertion.assertMatchException(";", tester, 2, 1);
Assertion.assertMatchException(" ", tester, 1, 1);
// 对于这种情形,和前面的c_wsp()的问题是类似的。
Assertion.assertMatchException(";" + (char) 0x0D, tester, 1, 1);
}
本系列文章索引:基于预测的ABNF文法分析器