HiveParse sql语句解析

HiveParse 进行sql语句解析

解析sql中表名,查询字段名,where条件字段名

调用工具处理类


import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.hive.ql.parse.*;

import java.io.IOException;
import java.util.*;

/**
 * 目的:获取AST中的表,列,以及对其所做的操作,如SELECT,INSERT
 * 重点:获取SELECT操作中的表和列的相关操作。其他操作这判断到表级别。
 * 实现思路:对AST深度优先遍历,遇到操作的token则判断当前的操作,
 *                     遇到TOK_TAB或TOK_TABREF则判断出当前操作的表,遇到子句则压栈当前处理,处理子句。
 *                    子句处理完,栈弹出。
 *
 */
public class HiveParse {

    private  static final String UNKNOWN = "UNKNOWN";
    private Map<String, String> alias = new HashMap<String, String>();
    private Map<String, String> cols = new TreeMap<String, String>();
    private Map<String, String> wherecols = new TreeMap<String, String>();
    private Map<String, String> colAlais = new TreeMap<String, String>();
    private Set<String> tables = new HashSet<String>();
    private Stack<String> tableNameStack = new Stack<String>();
    private Stack<Oper> operStack = new Stack<Oper>();
    private String nowQueryTable = "";//定义及处理不清晰,修改为query或from节点对应的table集合或许好点。目前正在查询处理的表可能不止一个。
    private Oper oper ;
    private boolean joinClause = false;

    private enum Oper {
        SELECT, INSERT, DROP, TRUNCATE, LOAD, CREATETABLE, ALTER
    }
    public Set<String> parseIteral(ASTNode ast) {
        Set<String> set= new HashSet<String>();//当前查询所对应到的表集合
        prepareToParseCurrentNodeAndChilds(ast);
        set.addAll(parseChildNodes(ast));
        set.addAll(parseCurrentNode(ast ,set));
        endParseCurrentNode(ast);
        return set;
    }
    private void endParseCurrentNode(ASTNode ast){
        if (ast.getToken() != null) {
            switch (ast.getToken().getType()) {//join 从句结束,跳出join
                case HiveParser.TOK_RIGHTOUTERJOIN:
                case HiveParser.TOK_LEFTOUTERJOIN:
                case HiveParser.TOK_JOIN:
                    joinClause = false;
                    break;
                case HiveParser.TOK_QUERY:
                    break;
                case HiveParser.TOK_INSERT:
                case HiveParser.TOK_SELECT:
                    nowQueryTable = tableNameStack.pop();
                    oper = operStack.pop();
                    break;
            }
        }
    }
    private Set<String> parseCurrentNode(ASTNode ast, Set<String> set){
        if (ast.getToken() != null) {
            switch (ast.getToken().getType()) {
                case HiveParser.TOK_TABLE_PARTITION:
//            case HiveParser.TOK_TABNAME:
                    if (ast.getChildCount() != 2) {
                        String table = BaseSemanticAnalyzer
                                .getUnescapedName((ASTNode) ast.getChild(0));
                        if (oper == Oper.SELECT) {
                            nowQueryTable = table;
                        }
                        tables.add(table + "\t" + oper);
                    }
                    break;

                case HiveParser.TOK_TAB:// outputTable
                    String tableTab = BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) ast.getChild(0));
                    if (oper == Oper.SELECT) {
                        nowQueryTable = tableTab;
                    }
                    tables.add(tableTab + "\t" + oper);
                    break;
                case HiveParser.TOK_TABREF:// inputTable
                    ASTNode tabTree = (ASTNode) ast.getChild(0);
                    String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) tabTree.getChild(0))
                            : BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) tabTree.getChild(0))
                            + "." + tabTree.getChild(1);
                    if (oper == Oper.SELECT) {
                        if(joinClause && !"".equals(nowQueryTable) ){
                            nowQueryTable += "&"+tableName;//
                        }else{
                            nowQueryTable = tableName;
                        }
                        set.add(tableName);
                    }
                    tables.add(tableName + "\t" + oper);
                    if (ast.getChild(1) != null) {
                        String alia = ast.getChild(1).getText().toLowerCase();
                        alias.put(alia, tableName);//sql6 p别名在tabref只对应为一个表的别名。
                    }
                    break;
                case HiveParser.TOK_TABLE_OR_COL:
                    if (ast.getParent().getType() != HiveParser.DOT) {
                        String col = ast.getChild(0).getText().toLowerCase();
                        if (alias.get(col) == null
                                && colAlais.get(nowQueryTable + "." + col) == null) {
                            if(nowQueryTable.indexOf("&") > 0){//sql23
                                cols.put(UNKNOWN + "." + col, "");
                            }else{
                                cols.put(nowQueryTable + "." + col, "");
                            }
                        }
                    }
                    break;
                case HiveParser.TOK_ALLCOLREF:
                    cols.put(nowQueryTable + ".*", "");
                    break;
                case HiveParser.TOK_SUBQUERY:
                    if (ast.getChildCount() == 2) {
                        String tableAlias = unescapeIdentifier(ast.getChild(1)
                                .getText());
                        String aliaReal = "";
                        for(String table : set){
                            aliaReal+=table+"&";
                        }
                        if(aliaReal.length() !=0){
                            aliaReal = aliaReal.substring(0, aliaReal.length()-1);
                        }
//                    alias.put(tableAlias, nowQueryTable);//sql22
                        alias.put(tableAlias, aliaReal);//sql6
//                    alias.put(tableAlias, "");// just store alias
                    }
                    break;

                case HiveParser.TOK_SELEXPR:
                    if (ast.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
                        String column = ast.getChild(0).getChild(0).getText()
                                .toLowerCase();
                        if(nowQueryTable.indexOf("&") > 0){
                            cols.put(UNKNOWN + "." + column, "");
                        }else if (colAlais.get(nowQueryTable + "." + column) == null) {
                            cols.put(nowQueryTable + "." + column, "");
                        }
                    } else if (ast.getChild(1) != null) {// TOK_SELEXPR (+
                        // (TOK_TABLE_OR_COL id)
                        // 1) dd
                        String columnAlia = ast.getChild(1).getText().toLowerCase();
                        colAlais.put(nowQueryTable + "." + columnAlia, "");
                    }
                    break;
                case HiveParser.DOT:
                    if (ast.getType() == HiveParser.DOT) {
                        if (ast.getChildCount() == 2) {
                            if (ast.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
                                    && ast.getChild(0).getChildCount() == 1
                                    && ast.getChild(1).getType() == HiveParser.Identifier) {
                                String alia = BaseSemanticAnalyzer
                                        .unescapeIdentifier(ast.getChild(0)
                                                .getChild(0).getText()
                                                .toLowerCase());
                                String column = BaseSemanticAnalyzer
                                        .unescapeIdentifier(ast.getChild(1)
                                                .getText().toLowerCase());
                                String realTable = null;
                                if (!tables.contains(alia + "\t" + oper)
                                        && alias.get(alia) == null) {// [b SELECT, a
                                    // SELECT]
                                    alias.put(alia, nowQueryTable);
                                }
                                if (tables.contains(alia + "\t" + oper)) {
                                    realTable = alia;
                                } else if (alias.get(alia) != null) {
                                    realTable = alias.get(alia);
                                }
                                if (realTable == null || realTable.length() == 0 || realTable.indexOf("&") > 0) {
                                    realTable = UNKNOWN;
                                }
                                cols.put(realTable + "." + column, "");

                            }
                        }
                    }
                    break;
                case HiveParser.TOK_WHERE:
                            int a = ast.getChildCount();
                            List<ASTNode> set1 =new ArrayList<>();
                            List<ASTNode> set0 =new ArrayList<>();
                            if(a==1){
                                    set0=  parseChildNodes1((ASTNode)ast.getChild(0),set0);
                            }else if(a==2){
                                    set0=  parseChildNodes1((ASTNode)ast.getChild(0),set0);
                                    set1=  parseChildNodes1((ASTNode)ast.getChild(1),set1);
                            }
                            for (int i = 0; i < set0.size(); i++) {
                                if (set0.get(i).getChild(0).getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
                                        && set0.get(i).getChild(0).getChild(0).getChildCount() == 1) {
                                    String alia = BaseSemanticAnalyzer
                                            .unescapeIdentifier(set0.get(i).getChild(0).getChild(0)
                                                    .getChild(0).getText()
                                                    .toLowerCase());
                                    String column = BaseSemanticAnalyzer
                                            .unescapeIdentifier(set0.get(i).getChild(0).getChild(1)
                                                    .getText().toLowerCase());
                                    String realTable = null;
                                    if (!tables.contains(alia + "\t" + oper)
                                            && alias.get(alia) == null) {// [b SELECT, a
                                        // SELECT]
                                        alias.put(alia, nowQueryTable);
                                    }
                                    if (tables.contains(alia + "\t" + oper)) {
                                        realTable = alia;
                                    } else if (alias.get(alia) != null) {
                                        realTable = alias.get(alia);
                                    }
                                    if (realTable == null || realTable.length() == 0 || realTable.indexOf("&") > 0) {
                                        realTable = UNKNOWN;
                                    }
                                    wherecols.put(realTable + "." + column, "");

                                }else if (set0.get(i).getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
                                        && set0.get(i).getChild(0).getChildCount() == 1) {
                                    String column = BaseSemanticAnalyzer
                                            .unescapeIdentifier(set0.get(i).getChild(0)
                                                    .getChild(0).getText()
                                                    .toLowerCase());

                                    wherecols.put(nowQueryTable + "." + column, "");

                                }

                            }
                    break;
                case HiveParser.TOK_ALTERTABLE_ADDPARTS:
                case HiveParser.TOK_ALTERTABLE_RENAME:
                case HiveParser.TOK_ALTERTABLE_ADDCOLS:
                    ASTNode alterTableName = (ASTNode) ast.getChild(0);
                    tables.add(alterTableName.getText() + "\t" + oper);
                    break;
            }
        }
        return set;
    }



    private  Set<String> parseChildNodes(ASTNode ast){
        Set<String> set= new HashSet<String>();
        int numCh = ast.getChildCount();
        if (numCh > 0) {
            for (int num = 0; num < numCh; num++) {
                ASTNode child = (ASTNode) ast.getChild(num);
                set.addAll(parseIteral(child));
            }
        }
        return set;
    }

    private List<ASTNode> parseChildNodes1(ASTNode ast,List<ASTNode> set){
        if (ast.getType() == HiveParser.KW_OR
			||ast.getType() == HiveParser.KW_AND) {
                int numCh = ast.getChildCount();
                for (int num = 0; num < numCh; num++) {
                    parseChildNodes1((ASTNode) ast.getChild(num),set);
                }


        }else if (ast.getType() == HiveParser.NOTEQUAL //判断条件  > < like in
			|| ast.getType() == HiveParser.EQUAL
			|| ast.getType() == HiveParser.LESSTHAN
			|| ast.getType() == HiveParser.LESSTHANOREQUALTO
			|| ast.getType() == HiveParser.GREATERTHAN
			|| ast.getType() == HiveParser.GREATERTHANOREQUALTO
			|| ast.getType() == HiveParser.KW_LIKE
			|| ast.getType() == HiveParser.DIVIDE
			|| ast.getType() == HiveParser.PLUS
			|| ast.getType() == HiveParser.MINUS
			|| ast.getType() == HiveParser.STAR
			|| ast.getType() == HiveParser.MOD
			|| ast.getType() == HiveParser.AMPERSAND
			|| ast.getType() == HiveParser.TILDE
			|| ast.getType() == HiveParser.BITWISEOR
			|| ast.getType() == HiveParser.BITWISEXOR) {
            set.add(ast);
        }
        return set;
    }


    private void prepareToParseCurrentNodeAndChilds(ASTNode ast){
        if (ast.getToken() != null) {
            switch (ast.getToken().getType()) {//join 从句开始
                case HiveParser.TOK_RIGHTOUTERJOIN:
                case HiveParser.TOK_LEFTOUTERJOIN:
                case HiveParser.TOK_JOIN:
                    joinClause = true;
                    break;
                case HiveParser.TOK_QUERY:
                    tableNameStack.push(nowQueryTable);
                    operStack.push(oper);
                    nowQueryTable = "";//sql22
                    oper = Oper.SELECT;
                    break;
                case HiveParser.TOK_INSERT:
                    tableNameStack.push(nowQueryTable);
                    operStack.push(oper);
                    oper = Oper.INSERT;
                    break;
                case HiveParser.TOK_SELECT:
                    tableNameStack.push(nowQueryTable);
                    operStack.push(oper);
//                    nowQueryTable = nowQueryTable
                    // nowQueryTable = "";//语法树join
                    // 注释语法树sql9, 语法树join对应的设置为""的注释逻辑不符
                    oper = Oper.SELECT;
                    break;
                case HiveParser.TOK_DROPTABLE:
                    oper = Oper.DROP;
                    break;
                case HiveParser.TOK_TRUNCATETABLE:
                    oper = Oper.TRUNCATE;
                    break;
                case HiveParser.TOK_LOAD:
                    oper = Oper.LOAD;
                    break;
                case HiveParser.TOK_CREATETABLE:
                    oper = Oper.CREATETABLE;
                    break;
            }
            if (ast.getToken() != null
                    && ast.getToken().getType() >= HiveParser.TOK_ALTERDATABASE_PROPERTIES
                    && ast.getToken().getType() <= HiveParser.TOK_ALTERVIEW_RENAME) {
                oper = Oper.ALTER;
            }
        }
    }
    public static String unescapeIdentifier(String val) {
        if (val == null) {
            return null;
        }
        if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
            val = val.substring(1, val.length() - 1);
        }
        return val;
    }

    private void output(Map<String, String> map) {
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            String key = it.next();
            System.out.println(key + "\t" + map.get(key));
        }
    }
    public JSONObject parse(ASTNode ast) {
        parseIteral(ast);
        System.out.println("***************表***************");
        for (String table : tables) {
            System.out.println(table);
        }
        System.out.println("***************列***************");
        output(cols);
        System.out.println("***************where列***************");
        output(wherecols);
        System.out.println("***************别名***************");
        output(alias);
        JSONObject json=new JSONObject();
        json.put("cols",cols);
        json.put("wherecols",wherecols);
        json.put("tables",tables);
        json.put("alias",alias);
        return json;
    }

}

调用

ParseDriver pd = new ParseDriver();
HiveParse hp = new HiveParse();
System.out.println(sql);
ASTNode ast = pd.parse(sql);
System.out.println(ast.toStringTree());
jsonsql = hp.parse(ast);
Map<String, Set<String>> tablewherecols= new HashMap<>();

Set<String> settable= (Set<String>) jsonsql.get("tables");

Map<String, String> wherecols= (Map<String, String>) jsonsql.get("wherecols");
for (String table : settable) {
String [] biao = table.split("\t");
String table1 = biao[0];
Set<String> tabziduan= new HashSet<>();
for (String key : wherecols.keySet()) {
System.out.println(key);
String [] ziduan = key.split("\\.");
if (key.contains(table1)&&ziduan.length==2){
tabziduan.add(ziduan[1]);
tablewherecols.put(table1,tabziduan);
}
}
}
System.out.println(JSONObject.toJSONString(tablewherecols));```

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

lxy000001

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值