我们来看看rulelist,它是整个ABNF文法的入口,就是说一个ABNF文法就是一个规则列表rulelist。一个rulelist由若干个rule规则组成,每个rule由规则名rulename、定义方式define-as和元素elements构成。
先来看解析代码:
/*
This file is one of the component a Context-free Grammar Parser Generator,
which accept a piece of text as the input, and generates a parser
for the inputted context-free grammar.
Copyright (C) 2013, Junbiao Pan (Email: panjunbiao@gmail.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
public class AbnfParser {
// rulelist = 1*( rule / (*c-wsp c-nl) )
protected List<Rule> rulelist() throws IOException, MatchException, CollisionException {
Map<RuleName, Rule> ruleMap = new HashMap<RuleName, Rule>();
List<Rule> ruleList = new ArrayList<Rule>();
// 如果前向字符是字母、空格、分号、回车,则认为是rule、c-wsp或者c-nl
while (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A) || match(is.peek(), 0x20) || match(is.peek(), ';') || match(is.peek(), 0x0D)) {
// 如果是字母开头,则认为是rule,否则是c-wsp或者c-nl
if (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A)) {
// 解析一条规则
Rule rule = rule();
// 判断该条规则是否已经有有定义
if (null == ruleMap.get(rule.getRuleName())) {
// 如果没有定义则放入规则列表
ruleMap.put(rule.getRuleName(), rule);
ruleList.add(rule);
} else {
// 已有定义,则检查定义方式是否为增量定义
Rule defined = ruleMap.get(rule.getRuleName());
if ("=".equals(rule.getDefinedAs()) && "=".equals(defined.getDefinedAs())) {
// 如果不是增量定义,则抛出重复定义异常
throw new CollisionException(rule.getRuleName().toString() + " is redefined.", is.getPos(), is.getLine());
}
// 如果是增量定义则合并两条规则
if ("=".equals(rule.getDefinedAs())) defined.setDefinedAs("=");
defined.getElements().getAlternation().getConcatenations().addAll(rule.getElements().getAlternation().getConcatenations());
}
} else {
// 空格、分号、回车,则是c_wsp
while (match(is.peek(), 0x20) || match(is.peek(), ';') || match(is.peek(), 0x0D)) {
c_wsp();
}
c_nl();
}
}
return ruleList;
}
// rulename = ALPHA *(ALPHA / DIGIT / "-")
protected RuleName rulename() throws IOException, MatchException {
// ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
// DIGIT = %x30-39
// 规则名的第一个字符必须是字母
if (!(match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A))) {
throw new MatchException("'A'-'Z'/'a'-'z'", is.peek(), is.getPos(), is.getLine());
}
String rulename = "";
rulename += (char)is.read();
// 规则名的后续字符可以是字母、数字、破折号
while (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A) || match(is.peek(), 0x30, 0x39) |match(is.peek(), '-')) {
rulename += (char)is.read();
}
return new RuleName(prefix, rulename);
}
// defined-as = *c-wsp ("=" / "=/") *c-wsp
protected String defined_as() throws IOException, MatchException {
String value = "";
// 等号前面的空格
while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';') || match(is.peek(), (char)0x0D)) {
c_wsp();
}
// 等号
assertMatch(is.peek(), '=');
value = String.valueOf((char)is.read());
// 是否增量定义
if (match(is.peek(), '/')) {
value += (char)is.read();
}
// 等号后面的空格
while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';') || match(is.peek(), (char)0x0D)) {
c_wsp();
}
return value;
}
// elements = alternation *c-wsp
protected Elements elements() throws IOException, MatchException {
//