在网上找了很久Antlr文法转语法图的工具也没有找到,只能自己写一个了,效果还不错~
先看下效果,语法定义如下:
ref_cursor_type_definition
: TYPE type IS REF CURSOR (RETURN ((db_table_or_view | cursor | cursor_variable) '%' ROWTYPE
| record '%' TYPE
| record_type
| ref_cursor_type))? ';'
;
转换成语法图的效果(很漂亮吧~):
实现方案
画语法图的工具可以用python第三方包 railroad-diagrams
pip install railroad-diagrams
解析Antlr文法需要可以用 antlr4-python3-runtime
pip install antlr4-python3-runtime
python中使用antlr需要做一些环境准备,参考Python中使用Antlr的环境准备
基本思路
- 编写Antlr的Antlr文法(有点绕,就是编写ANTLR的G4文件)
- 生成词法分析器和语法分析器
- 自定义访问器,生成语法文件中每个规则对应的Diagram对象
- 从访问器中获取每个规则的Diagram对象,转成SVG
代码
前提
对railroad-diagrams的源码进行简单的修改,实现关键字使用矩形框包裹,非关键字使用圆角矩形包裹,修改位置如下:
class Terminal(DiagramItem):
...
def format(self, x, y, width):
...
# 注释掉初始代码
# DiagramItem('rect', {'x': x + leftGap, 'y': y - 11, 'width': self.width,
# 'height': self.up + self.down, 'rx': 10, 'ry': 10}).addTo(self)
# 进行如下修改
if self.text[0].isupper():
DiagramItem('rect', {'x': x + leftGap, 'y': y - 11, 'width': self.width,
'height': self.up + self.down}).addTo(self)
else:
DiagramItem('rect', {'x': x + leftGap, 'y': y - 11, 'width': self.width,
'height': self.up + self.down, 'rx': 10, 'ry': 10}).addTo(self)
1、antlr语言的antlr文法 Antlr4.g4
基于此文件生成对应的语法分析器和词法分析器,请使用访问器模式
grammar Antlr4 ;
ant4File
: gramDefine gramImport* ant4Rule +
;
gramDefine
: 'grammar' gramName SEMI
;
gramImport
: 'import' importName SEMI
;
ant4Rule
: RuleName COLON ruleDefine SEMI
;
ruleDefine
: ruleBranch (PIPE ruleBranch)*
;
// 备选分支
ruleBranch
: branchElement +
;
// 分支元素
branchElement
: (TokenName | TokenUname | RuleName | ruleGroup) ruleSymbol?
;
// 一组规则
ruleGroup
: LPAREN (ruleBranch (PIPE ruleBranch)*) + RPAREN
;
ruleSymbol
: STAR | PLUS | QUES
;
gramName
: TokenName // 语法名称与Token规则一致,都是大写字母开头
;
importName
: TokenName | RuleName // 这个大小写字母开头都可以
;
PIPE: '|';
STAR: '*';
PLUS: '+';
QUES: '?';
LPAREN: '(';
RPAREN: ')';
COLON: ':';
SEMI: ';';
// 未定义token
TokenUname
: '\''.*?'\''
;
// 规则名称
RuleName
: [a-z]+ [a-zA-Z0-9_]*
;
//token名称
TokenName
: [A-Z]+ [a-zA-Z0-9_]*
;
ID
: [a-zA-Z_]+ [a-zA-Z0-9_]*
;
// 空白字符和注释
WS
: [ \t\r\n] + -> skip
;
COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
LINE_COMMENT
: '//' .*? (('\r'? '\n') | EOF) -> channel(HIDDEN)
;
2、开发访问器 myvisitor.py
# coding: utf-8
# @Time : 2022/1/4 19:09
# @Author : wangwei
# @FileName: myvisitor.py
from antlr_parse.Antlr4Visitor import Antlr4Visitor
from antlr_parse.Antlr4Parser import Antlr4Parser
from railroad import Diagram, Choice, Sequence, Optional, OneOrMore, ZeroOrMore, Comment
class AntlrVisitor(Antlr4Visitor):
rule_parse = {}
def __init__(self):
pass
def visitAnt4Rule(self, ctx: Antlr4Parser.Ant4RuleContext):
self.rule_parse['ruleName'] = ctx.RuleName().getText()
self.rule_parse['ruleDefine'] = self.visit((ctx.ruleDefine()))
def visitRuleDefine(self, ctx: Antlr4Parser.RuleDefineContext):
branchs = []
for _branch in ctx.ruleBranch():
_bch = self.visit(_branch)
branchs.append(_bch)
return Diagram(Choice(0, *branchs))
def visitRuleBranch(self, ctx: Antlr4Parser.RuleBranchContext):
branch_elements = []
for index, _element in enumerate(ctx.branchElement()):
_symbol = _element.ruleSymbol()
if _element.ruleGroup() and _symbol and _symbol.getText() == "*": # 解决 column (, column)*的语法
_branchs = _element.ruleGroup().ruleBranch()
if len(_branchs) == 1:
_eles = _branchs[0].branchElement()
if len(_eles) == 2 and _eles[1].getText() == ctx.branchElement(index - 1).getText():
if len(branch_elements) > 0: branch_elements.pop()
_ele = OneOrMore(self.visit(_eles[1]), Comment(self.visit(_eles[0])))
else:
_ele = self.visit(_element)
else:
_ele = self.visit(_element)
else:
_ele = self.visit(_element)
branch_elements.append(_ele)
return Sequence(*branch_elements)
def visitBranchElement(self, ctx: Antlr4Parser.BranchElementContext):
element_symbol = ctx.getText()
element_bnf = element_symbol[-1]
group = ctx.ruleGroup()
utoken = ctx.TokenUname()
if utoken:
element_symbol = eval(element_symbol)
if not group:
if element_bnf == "?":
return Optional(element_symbol[:-1], "skip")
elif element_bnf == "*":
return ZeroOrMore(element_symbol[:-1])
elif element_bnf == "+":
return OneOrMore(element_symbol[:-1])
return element_symbol
if group:
branchs = self.visit(group)
if element_bnf == "?":
return Optional(Choice(0, *branchs), skip=True)
elif element_bnf == "*":
return ZeroOrMore(Choice(0, *branchs), Comment(","), skip=True)
elif element_bnf == "+":
return OneOrMore(Choice(0, *branchs))
else:
return Choice(0, *branchs)
symbol = ctx.ruleSymbol()
if symbol:
return self.visit(symbol)
def visitRuleGroup(self, ctx: Antlr4Parser.RuleGroupContext):
branchs = []
for _branch in ctx.ruleBranch():
_bch = self.visit(_branch)
branchs.append(_bch)
return branchs
def visitRuleSymbol(self, ctx: Antlr4Parser.RuleSymbolContext):
return ctx.getText()
3、定义Antlr4File类,作为G4文件的入口
# coding: utf-8
# @Time : 2022/1/4 19:30
# @Author : wangwei
# @FileName: antlr4_file.py
import os
from antlr4 import *
from antlr_parse.Antlr4Parser import Antlr4Parser
from antlr_parse.Antlr4Lexer import Antlr4Lexer
from antlr_parse.myvisitor import AntlrVisitor
import sys
class BailSimpleLexer(Antlr4Lexer):
def __init__(self, input=None):
super(BailSimpleLexer, self).__init__(input=input)
def recover(self, re:RecognitionException):
"""重写recover方法,当语法错误时抛出异常并停止"""
sys.exit(1)
class Antlr4File:
"""antlr文件类"""
def __init__(self, file, encoding='utf-8'):
if os.path.exists(file):
self.antlrFile = file
self.encoding = encoding
self.ant4RuleTrees = self.get_ant4Rule_trees()
else:
raise Exception("file not found: {}".format(file))
def get_ant4Rule_trees(self):
"""获取所有ant4Rule规则的实例"""
input_ = FileStream(self.antlrFile, encoding=self.encoding)
lexer = BailSimpleLexer(input_)
tokens = CommonTokenStream(lexer)
parser = Antlr4Parser(tokens)
try:
file_tree = parser.ant4File()
except Exception as e:
print(e)
sys.exit(1)
return file_tree.ant4Rule()
def parse_rule(self, ruleTree):
"""解析语法ant4Rule的树, 返回解析后的规则"""
visitor = AntlrVisitor()
visitor.visit(ruleTree)
return visitor.rule_parse
4、开发应用模块
# coding: utf-8
# @Time : 2022/1/4 20:29
# @Author : wangwei
# @FileName: gen_svg.py
from antlr_parse.antlr4_file import Antlr4File
"""对antlr4文法进行分解"""
def write_svg(s: str):
with open(filename, "a") as f:
f.write(s)
if __name__ == '__main__':
import os
g4file = 'PLSQL.g4' # G4文件
gen_rule_svgs = ["ref_cursor_type_definition"] # 指定要生成语法图的规则名称, 为空时生成语法文件中的全部规则的语法图
ant4file = Antlr4File(g4file, 'utf-8')
ant4Rules = ant4file.ant4RuleTrees
for ruletree in ant4Rules:
rule = ant4file.parse_rule(ruletree)
rule_name = rule.get("ruleName")
filename = "{}.svg".format(rule_name)
if len(gen_rule_svgs) == 0 or rule_name in gen_rule_svgs:
if os.path.exists(filename):
os.remove(filename)
d = rule.get("ruleDefine")
d.writeSvg(write_svg)
5、用来测试的G4文件PLSQL.g4
说明:不需要完整的语法文件,例如token不需要定义
grammar PLSQL ;
ref_cursor_type_definition
: TYPE type IS REF CURSOR (RETURN ((db_table_or_view | cursor | cursor_variable) '%' ROWTYPE
| record '%' TYPE
| record_type
| ref_cursor_type))? ';'
;
结帖~