说明:解解析sql语句工具析工具可以解析出sql语句 语法、库表、条件 下面示例只是解析 库表的示例,其他解析需要再分析解析规范
使用:
一、mvaen引用
<!--解析sql语句使用jar包 start-->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.0.0-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.2.0</version>
</dependency>
<!--解析sql语句使用jar包 end-->
二、引入解析类
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.Stack;
import com.alibaba.fastjson.JSONObject;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.*;
/**
* 目的:获取AST中的表,列,以及对其所做的操作,如SELECT,INSERT
* 重点:获取SELECT操作中的表和列的相关操作。其他操作这判断到表级别。
* 实现思路:对AST深度优先遍历,遇到操作的token则判断当前的操作,
* 遇到TOK_TAB或TOK_TABREF或TOK_ALTERTABLE或者则判断出当前操作的表,遇到子句则压栈当前处理,处理子句。
* 子句处理完,栈弹出。
*/
public class HiveParseUtil {
private final Log log = LogFactory.getLog(HiveParseUtil.class);
private final String UNKNOWN = "UNKNOWN";
private String defaultDbName;
private Set<String> tables = new HashSet<String>();
private Stack<String> tableNameStack = new Stack<String>();
private Stack<Oper> operStack = new Stack<Oper>();
private String nowQueryTable = "";//定义及处理不清晰,修改为query或from节点对应的table集合或许好点。目前正在查询处理的表可能不止一个。
private Oper oper;
private boolean joinClause = false;
private enum Oper {
SELECT, INSERT, DROP, TRUNCATE, LOAD, CREATE, ALTER, USE
}
public Set<String> parseIteral(ASTNode ast) {
Set<String> set = new HashSet<String>();//当前查询所对应到的表集合
prepareToParseCurrentNodeAndChilds(ast);
set.addAll(parseChildNodes(ast));
set.addAll(parseCurrentNode(ast, set));
endParseCurrentNode(ast);
return set;
}
private void endParseCurrentNode(ASTNode ast) {
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {//join 从句结束,跳出join
case HiveParser.TOK_RIGHTOUTERJOIN:
case HiveParser.TOK_LEFTOUTERJOIN:
case HiveParser.TOK_JOIN:
joinClause = false;
break;
case HiveParser.TOK_QUERY:
case HiveParser.TOK_INSERT:
case HiveParser.TOK_SELECT:
nowQueryTable = tableNameStack.pop();
oper = operStack.pop();
break;
}
}
}
private Set<String> parseCurrentNode(ASTNode ast, Set<String> set) {
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {
case HiveParser.TOK_TABLE_PARTITION:
if (ast.getChildCount() != 2) {
String table = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = table;
}
tables.add(table + "\t" + oper);
}
break;
case HiveParser.TOK_TAB:// outputTable
String tableTab = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = tableTab;
}
tables.add(tableTab + "\t" + oper);
break;
case HiveParser.TOK_TABREF:// inputTable
ASTNode tabTree = (ASTNode) ast.getChild(0);
String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer
.getUnescapedName((ASTNode) tabTree.getChild(0))
: BaseSemanticAnalyzer
.getUnescapedName((ASTNode) tabTree.getChild(0))
+ "." + tabTree.getChild(1);
if (oper == Oper.SELECT) {
if (joinClause && !"".equals(nowQueryTable)) {
nowQueryTable += "&" &#