2024年9月12日11:16:01----0.1.8
2024年9月12日13:46:02----0.1.9
目标
从一个java文件中提取出类名和方法名、import
java源文件
文件名是main.java,具体内容如下
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import org.antlr.v4.runtime.tree.TerminalNode;
import java.util.List;
public class main {
public static void main(String[] args) {
//需要分析的语句
String inputStr = "{42,43,{32,7+6,3+9,5,6+55},3,7+1,44}";
System.out.println("input size is "+inputStr.length());
//将字符串转换为ANTLR的CharStream
CharStream input = CharStreams.fromString(inputStr);
//使用词法分析器分析转换后的输入
Lexer lexer = new HelloLexer(input);
//新建一个词法符号的缓冲区,存储生成的词法符号
CommonTokenStream commonTokenStream = new CommonTokenStream(lexer);
//使用语法分析器处理缓冲区的内容
HelloParser helloParser = new HelloParser(commonTokenStream);
//对第一个line规则进行语法分析
ParseTree parseTree = helloParser.line();
//获取树的子数目
int childCount = parseTree.getChildCount();
//打印LISP风格的树
System.out.println(parseTree.toStringTree());
//循环打印出子节点
for (int i = 0; i < childCount; i++) {
System.out.println("child " + i + ":" + parseTree.getChild(i).toStringTree());
}
System.out.println(" ");
// catToDog cc=new catToDog();
// ParseTreeWalker walker=new ParseTreeWalker();
// walker.walk(cc,parseTree);
myVisitor mv = new myVisitor();
mv.visit(parseTree);
}
public static class myVisitor extends HelloBaseVisitor<String> {
/**
* {@inheritDoc}
*
* <p>The default implementation returns the result of calling
* {@link #visitChildren} on {@code ctx}.</p>
*/
@Override
public String visitLine(HelloParser.LineContext ctx) {
// System.out.println("visitLine"+ctx.value().toString()+" text:"+ctx.getText());
System.out.printf("{");
return visitChildren(ctx);
}
/**
* {@inheritDoc}
*
* <p>The default implementation returns the result of calling
* {@link #visitChildren} on {@code ctx}.</p>
*/
@Override
public String visitIntShow(HelloParser.IntShowContext ctx) {
int stopInt=ctx.getParent().getStop().getCharPositionInLine()-1;
// System.out.println("dd1:"+getLastPosition(ctx.getStop().toString()) +" "+stopInt);
if(getLastPosition(ctx.getStop().toString())==stopInt){
System.out.printf(Integer.toHexString(Integer.parseInt(ctx.getText()))+"},");
if(getLastPosition(ctx.getStop().toString())==34){
System.out.printf(Integer.toHexString(Integer.parseInt(ctx.getText()))+"}");
}else{
System.out.printf(Integer.toHexString(Integer.parseInt(ctx.getText()))+"},");
}
}else{
System.out.printf(Integer.toHexString(Integer.parseInt(ctx.getText()))+",");
}
return visitChildren(ctx);
}
/**
* {@inheritDoc}
*
* <p>The default implementation returns the result of calling
* {@link #visitChildren} on {@code ctx}.</p>
*/
@Override
public String visitLineShow(HelloParser.LineShowContext ctx) {
return visitChildren(ctx);
}
/**
* {@inheritDoc}
*
* <p>The default implementation returns the result of calling
* {@link #visitChildren} on {@code ctx}.</p>
*/
@Override public String visitGetSUM(HelloParser.GetSUMContext ctx) {
int stopInt=ctx.getParent().getStop().getCharPositionInLine()-1;
// System.out.println("dd:"+getLastPosition(ctx.INT(1).getSymbol().toString()) +" "+stopInt);
if(getLastPosition(ctx.INT(1).getSymbol().toString())==stopInt){
if(getLastPosition(ctx.INT(1).getSymbol().toString())==34){
System.out.printf((Integer.parseInt(ctx.INT(0).toString())+Integer.parseInt(ctx.INT(1).toString()))+"}");
}else{
System.out.printf((Integer.parseInt(ctx.INT(0).toString())+Integer.parseInt(ctx.INT(1).toString()))+"},");
}
}else{
System.out.printf((Integer.parseInt(ctx.INT(0).toString())+Integer.parseInt(ctx.INT(1).toString()))+",");
}
return "4444";
}
public int getLastPosition(String ruleString){
// String cc="[@25,28:29='44',<3>,1:28]";
String lastPositionStr=ruleString.split("=")[0].split(":")[1];
return Integer.parseInt(lastPositionStr);
}
}
}
java的g4文件
这里使用的java.g4文件,下载地址如下
https://download.csdn.net/download/m0_60688978/89742093
https://github.com/antlr/codebuff/tree/master/grammars/org/antlr/codebuff
生成antlr代码
参考这篇文章就可以了
最终会生成如下java文件
D:\源码\kafka-2.1\antlr\gen\Java.interp
D:\源码\kafka-2.1\antlr\gen\Java.tokens
D:\源码\kafka-2.1\antlr\gen\JavaBaseListener.java
D:\源码\kafka-2.1\antlr\gen\JavaBaseVisitor.java
D:\源码\kafka-2.1\antlr\gen\JavaLexer.java
D:\源码\kafka-2.1\antlr\gen\JavaLexer.interp
D:\源码\kafka-2.1\antlr\gen\JavaLexer.tokens
D:\源码\kafka-2.1\antlr\gen\JavaListener.java
D:\源码\kafka-2.1\antlr\gen\JavaParser.java
D:\源码\kafka-2.1\antlr\gen\JavaVisitor.java
最终代码
里面由一个main方法,和一个继承了JavaBaseListener类的自定义类,实现了几个需要的方法。使用walk类去触发遍历。
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Path;
/**
* https://github.com/antlr/codebuff/blob/master/grammars/org/antlr/codebuff/Java8.g4
*/
public class javamain {
public static JavaParser javaParser;
public static void main(String[] args) throws IOException {
// String inputStr = "{42,43,{32,7+6,3+9,5,6+55},3,7+1,44}";
// System.out.println("input size is "+inputStr.length());
//将字符串转换为ANTLR的CharStream
// CharStream input = CharStreams.fromString(inputStr);
CharStream input =CharStreams.fromStream(new FileInputStream("D:\\源码\\kafka-2.1\\antlr\\src\\main.java"));
//使用词法分析器分析转换后的输入
Lexer lexer = new JavaLexer(input);
//新建一个词法符号的缓冲区,存储生成的词法符号
CommonTokenStream commonTokenStream = new CommonTokenStream(lexer);
//使用语法分析器处理缓冲区的内容
javaParser = new JavaParser(commonTokenStream);
//对第一个line规则进行语法分析
ParseTree parseTree = javaParser.compilationUnit();
//获取树的子数目
int childCount = parseTree.getChildCount();
//打印LISP风格的树
System.out.println(parseTree.toStringTree());
//循环打印出子节点
for (int i = 0; i < childCount; i++) {
System.out.println("child " + i + ":" + parseTree.getChild(i).toStringTree());
}
System.out.println(" ");
recognizeJava cc=new recognizeJava();
ParseTreeWalker walker=new ParseTreeWalker();
walker.walk(cc,parseTree);
// main.myVisitor mv = new main.myVisitor();
// mv.visit(parseTree);
}
public static class recognizeJava extends JavaBaseListener{
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void enterClassDeclaration(JavaParser.ClassDeclarationContext ctx) {
System.out.printf(ctx.CLASS()+" "+ctx.Identifier().getText()+"{\n");
}
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void exitClassDeclaration(JavaParser.ClassDeclarationContext ctx) {
System.out.printf("}\n");
}
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void enterMethodDeclaration(JavaParser.MethodDeclarationContext ctx) {
TokenStream tokens = javaParser.getTokenStream();
// System.out.printf(ctx.getStart().getText()+" ");
String type="void";
if(ctx.typeSpec()!=null){
type=tokens.getText(ctx.typeSpec());
}
System.out.printf(type+" "+ctx.Identifier()+ctx.formalParameters().getText()+"{");
}
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void exitMethodDeclaration(JavaParser.MethodDeclarationContext ctx) {
System.out.printf("}\n");
}
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void enterClassOrInterfaceModifier(JavaParser.ClassOrInterfaceModifierContext ctx) {
// System.out.println(":"+ctx.getParent().getParent().getStart().getText());
if("public".equals(ctx.getParent().getParent().getStart().getText())){
//这里写死的办法需要优化一下
if("static".equals(ctx.getText())){
System.out.printf(ctx.getText()+" ");
}else{
System.out.printf(" "+ctx.getText()+" ");
}
}else if("@".equals(ctx.getParent().getParent().getStart().getText())){
System.out.printf("\n "+ctx.getText()+" ");
}else{
System.out.printf(ctx.getText()+" ");
}
// System.out.printf(ctx.getParent().getParent().getStart().getText()+":"+ctx.getText()+" ");
}
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void exitClassOrInterfaceModifier(JavaParser.ClassOrInterfaceModifierContext ctx) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void enterFormalParameters(JavaParser.FormalParametersContext ctx) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void exitFormalParameters(JavaParser.FormalParametersContext ctx) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void enterImportDeclaration(JavaParser.ImportDeclarationContext ctx) {
System.out.printf(ctx.IMPORT().getText()+" "+ctx.qualifiedName().getText()+"\n");
}
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
@Override public void exitImportDeclaration(JavaParser.ImportDeclarationContext ctx) { }
}
}
调测结果
虽然不完美,但是基本的能提取了,提取到的类和方法如下
import org.antlr.v4.runtime
import org.antlr.v4.runtime.tree.ErrorNode
import org.antlr.v4.runtime.tree.ParseTree
import org.antlr.v4.runtime.tree.ParseTreeWalker
import org.antlr.v4.runtime.tree.TerminalNode
import java.util.List
public class main{
public static void main(String[]args){}
public static class myVisitor{
@Override
public String visitLine(HelloParser.LineContextctx){}
@Override
public String visitIntShow(HelloParser.IntShowContextctx){}
@Override
public String visitLineShow(HelloParser.LineShowContextctx){}
@Override
public String visitGetSUM(HelloParser.GetSUMContextctx){}
public int getLastPosition(StringruleString){}
}
}
阶段性总结
- 再一次体会到了Listener遍历是从树顶到树脚,遍历到哪个词法规则,就会去自定义Listener中执行对应的方法。比如enterClassOrInterfaceModifier表示public或static,在遍历它语句public static时就会执行enterClassOrInterfaceModifier这个方法两次。
- 这篇中只使用了重写了五个方法:enterClassOrInterfaceModifier、exitMethodDeclaration、enterMethodDeclaration、exitClassDeclaration、enterClassDeclaration。
- TokenStream tokens = javaParser.getTokenStream(),token内容打印出来就是源文件的内容。其他常用的方法如下
tokens.getTokenSource().getLine():获取内容的行数,包括了EOF,所以你看到文件只有125行,这查询出来是126,是因为包括了EOF标记