Python实现Antlr文法转语法图(铁路图)

13 篇文章 6 订阅

在网上找了很久Antlr文法转语法图的工具也没有找到,只能自己写一个了,效果还不错~
先看下效果,语法定义如下:

ref_cursor_type_definition
    : TYPE type IS REF CURSOR (RETURN ((db_table_or_view | cursor | cursor_variable) '%' ROWTYPE
    | record '%' TYPE
    | record_type
    | ref_cursor_type))? ';'
    ;

转换成语法图的效果(很漂亮吧~):
语法图

实现方案

画语法图的工具可以用python第三方包 railroad-diagrams

pip install railroad-diagrams

解析Antlr文法需要可以用 antlr4-python3-runtime

pip install antlr4-python3-runtime

python中使用antlr需要做一些环境准备,参考Python中使用Antlr的环境准备

基本思路

  1. 编写Antlr的Antlr文法(有点绕,就是编写ANTLR的G4文件)
  2. 生成词法分析器和语法分析器
  3. 自定义访问器,生成语法文件中每个规则对应的Diagram对象
  4. 从访问器中获取每个规则的Diagram对象,转成SVG

代码

前提

对railroad-diagrams的源码进行简单的修改,实现关键字使用矩形框包裹,非关键字使用圆角矩形包裹,修改位置如下:

class Terminal(DiagramItem):
	...
	def format(self, x, y, width):
		...
		# 注释掉初始代码
		# DiagramItem('rect', {'x': x + leftGap, 'y': y - 11, 'width': self.width,
		# 					 'height': self.up + self.down, 'rx': 10, 'ry': 10}).addTo(self)
		# 进行如下修改
		if self.text[0].isupper():
			DiagramItem('rect', {'x': x + leftGap, 'y': y - 11, 'width': self.width,
						 		'height': self.up + self.down}).addTo(self)
		else:
			DiagramItem('rect', {'x': x + leftGap, 'y': y - 11, 'width': self.width,
								 'height': self.up + self.down, 'rx': 10, 'ry': 10}).addTo(self)

1、antlr语言的antlr文法 Antlr4.g4

基于此文件生成对应的语法分析器和词法分析器,请使用访问器模式

grammar Antlr4 ;

ant4File
    : gramDefine gramImport* ant4Rule +
    ;

gramDefine
    : 'grammar' gramName SEMI
    ;

gramImport
    : 'import' importName SEMI
    ;

ant4Rule
    : RuleName COLON ruleDefine SEMI
    ;

ruleDefine
    : ruleBranch (PIPE ruleBranch)*
    ;

// 备选分支
ruleBranch
    : branchElement +
    ;

// 分支元素
branchElement
    : (TokenName | TokenUname | RuleName | ruleGroup) ruleSymbol?
    ;

// 一组规则
ruleGroup
    : LPAREN (ruleBranch (PIPE ruleBranch)*) + RPAREN
    ;
ruleSymbol
    : STAR | PLUS | QUES
    ;

gramName
    : TokenName     // 语法名称与Token规则一致,都是大写字母开头
    ;

importName
    : TokenName | RuleName     // 这个大小写字母开头都可以
    ;

PIPE: '|';
STAR: '*';
PLUS: '+';
QUES: '?';
LPAREN: '(';
RPAREN: ')';
COLON: ':';
SEMI: ';';


// 未定义token
TokenUname
    : '\''.*?'\''
    ;

// 规则名称
RuleName
    : [a-z]+ [a-zA-Z0-9_]*
    ;

//token名称
TokenName
    : [A-Z]+ [a-zA-Z0-9_]*
    ;

ID
    : [a-zA-Z_]+ [a-zA-Z0-9_]*
    ;

// 空白字符和注释
WS
    : [ \t\r\n] + -> skip
    ;
COMMENT
    : '/*' .*? '*/' -> channel(HIDDEN)
    ;
LINE_COMMENT
    : '//' .*? (('\r'? '\n') | EOF) -> channel(HIDDEN)
    ;

2、开发访问器 myvisitor.py

# coding: utf-8
# @Time    : 2022/1/4 19:09
# @Author  : wangwei
# @FileName: myvisitor.py
from antlr_parse.Antlr4Visitor import Antlr4Visitor
from antlr_parse.Antlr4Parser import Antlr4Parser
from railroad import Diagram, Choice, Sequence, Optional, OneOrMore, ZeroOrMore, Comment


class AntlrVisitor(Antlr4Visitor):
    rule_parse = {}

    def __init__(self):
        pass

    def visitAnt4Rule(self, ctx: Antlr4Parser.Ant4RuleContext):
        self.rule_parse['ruleName'] = ctx.RuleName().getText()
        self.rule_parse['ruleDefine'] = self.visit((ctx.ruleDefine()))

    def visitRuleDefine(self, ctx: Antlr4Parser.RuleDefineContext):
        branchs = []
        for _branch in ctx.ruleBranch():
            _bch = self.visit(_branch)
            branchs.append(_bch)
        return Diagram(Choice(0, *branchs))

    def visitRuleBranch(self, ctx: Antlr4Parser.RuleBranchContext):
        branch_elements = []
        for index, _element in enumerate(ctx.branchElement()):
            _symbol = _element.ruleSymbol()
            if _element.ruleGroup() and _symbol and _symbol.getText() == "*":    # 解决 column (, column)*的语法
                _branchs = _element.ruleGroup().ruleBranch()
                if len(_branchs) == 1:
                    _eles = _branchs[0].branchElement()
                    if len(_eles) == 2 and _eles[1].getText() == ctx.branchElement(index - 1).getText():
                        if len(branch_elements) > 0: branch_elements.pop()
                        _ele = OneOrMore(self.visit(_eles[1]), Comment(self.visit(_eles[0])))

                    else:
                        _ele = self.visit(_element)
                else:
                    _ele = self.visit(_element)
            else:
                _ele = self.visit(_element)
            branch_elements.append(_ele)
        return Sequence(*branch_elements)

    def visitBranchElement(self, ctx: Antlr4Parser.BranchElementContext):
        element_symbol = ctx.getText()
        element_bnf = element_symbol[-1]
        group = ctx.ruleGroup()
        utoken = ctx.TokenUname()
        if utoken:
            element_symbol = eval(element_symbol)
        if not group:
            if element_bnf == "?":
                return Optional(element_symbol[:-1], "skip")
            elif element_bnf == "*":
                return ZeroOrMore(element_symbol[:-1])
            elif element_bnf == "+":
                return OneOrMore(element_symbol[:-1])
            return element_symbol
        if group:
            branchs = self.visit(group)
            if element_bnf == "?":
                return Optional(Choice(0, *branchs), skip=True)
            elif element_bnf == "*":
                return ZeroOrMore(Choice(0, *branchs), Comment(","), skip=True)

            elif element_bnf == "+":
                return OneOrMore(Choice(0, *branchs))
            else:
                return Choice(0, *branchs)
        symbol = ctx.ruleSymbol()
        if symbol:
            return self.visit(symbol)

    def visitRuleGroup(self, ctx: Antlr4Parser.RuleGroupContext):
        branchs = []
        for _branch in ctx.ruleBranch():
            _bch = self.visit(_branch)
            branchs.append(_bch)
        return branchs

    def visitRuleSymbol(self, ctx: Antlr4Parser.RuleSymbolContext):
        return ctx.getText()

3、定义Antlr4File类,作为G4文件的入口

# coding: utf-8
# @Time    : 2022/1/4 19:30
# @Author  : wangwei
# @FileName: antlr4_file.py
import os
from antlr4 import *
from antlr_parse.Antlr4Parser import Antlr4Parser
from antlr_parse.Antlr4Lexer import Antlr4Lexer
from antlr_parse.myvisitor import AntlrVisitor
import sys


class BailSimpleLexer(Antlr4Lexer):

    def __init__(self, input=None):
        super(BailSimpleLexer, self).__init__(input=input)

    def recover(self, re:RecognitionException):
        """重写recover方法,当语法错误时抛出异常并停止"""
        sys.exit(1)


class Antlr4File:
    """antlr文件类"""

    def __init__(self, file, encoding='utf-8'):
        if os.path.exists(file):
            self.antlrFile = file
            self.encoding = encoding
            self.ant4RuleTrees = self.get_ant4Rule_trees()
        else:
            raise Exception("file not found: {}".format(file))

    def get_ant4Rule_trees(self):
        """获取所有ant4Rule规则的实例"""
        input_ = FileStream(self.antlrFile, encoding=self.encoding)
        lexer = BailSimpleLexer(input_)
        tokens = CommonTokenStream(lexer)
        parser = Antlr4Parser(tokens)
        try:
            file_tree = parser.ant4File()
        except Exception as e:
            print(e)
            sys.exit(1)
        return file_tree.ant4Rule()

    def parse_rule(self, ruleTree):
        """解析语法ant4Rule的树, 返回解析后的规则"""
        visitor = AntlrVisitor()
        visitor.visit(ruleTree)
        return visitor.rule_parse

4、开发应用模块

# coding: utf-8
# @Time    : 2022/1/4 20:29
# @Author  : wangwei
# @FileName: gen_svg.py
from antlr_parse.antlr4_file import Antlr4File

"""对antlr4文法进行分解"""

def write_svg(s: str):
    with open(filename, "a") as f:
        f.write(s)

if __name__ == '__main__':
    import os
    g4file = 'PLSQL.g4'     # G4文件
    gen_rule_svgs = ["ref_cursor_type_definition"]     # 指定要生成语法图的规则名称, 为空时生成语法文件中的全部规则的语法图
    ant4file = Antlr4File(g4file, 'utf-8')
    ant4Rules = ant4file.ant4RuleTrees

    for ruletree in ant4Rules:
        rule = ant4file.parse_rule(ruletree)
        rule_name = rule.get("ruleName")
        filename = "{}.svg".format(rule_name)
        if len(gen_rule_svgs) == 0 or rule_name in gen_rule_svgs:
            if os.path.exists(filename):
                os.remove(filename)
            d = rule.get("ruleDefine")
            d.writeSvg(write_svg)

5、用来测试的G4文件PLSQL.g4

说明:不需要完整的语法文件,例如token不需要定义

grammar PLSQL ;

ref_cursor_type_definition
    : TYPE type IS REF CURSOR (RETURN ((db_table_or_view | cursor | cursor_variable) '%' ROWTYPE
    | record '%' TYPE
    | record_type
    | ref_cursor_type))? ';'
    ;

结帖~

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值