java 解析hiveserver2日志 解析HiveSQL 获取表的使用次数 热度
首先逐行读取hiveserver2日志
日志里每个sql之前都会包含Executing command关键字
故先匹配关键字
if (str.contains("Executing command"))
每行的打头都是固定的头部,使用正则表达式进行切割
String sqlRegex = ".*Executing command\\(queryId=[\\w-]*\\):";
String sql = str.replaceAll(sqlRegex, " ").trim();
下面是解析日志的工具类,首先导入依赖
<properties>
<hive.version>1.1.0</hive.version>
<hadoop.version>2.6.0</hadoop.version>
<poi.version>4.1.2</poi.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.36</version>
</dependency>
</dependencies>
一些类似mysql的依赖可以按需去掉,一些依赖因为后面的博客要用,我这边就一起写在这
下面是Hive日志解析工具类的代码
import org.apache.hadoop.hive.ql.parse.*;
import java.util.*;
/**
* 目的:获取AST中的表,列,以及对其所做的操作,如SELECT,INSERT
* 重点:获取SELECT操作中的表和列的相关操作。其他操作这判断到表级别。
* 实现思路:对AST深度优先遍历,遇到操作的token则判断当前的操作,
* 遇到TOK_TAB或TOK_TABREF则判断出当前操作的表,遇到子句则压栈当前处理,处理子句。
* 子句处理完,栈弹出。
*
*/
public class HiveParseUtils {
private static final String UNKNOWN = "UNKNOWN";
private static Map<String, String> alias = new HashMap<>();
private static Map<String, String> cols = new TreeMap<>();
private static Map<String, String> colAlais = new TreeMap<>();
public static List<String> tables = new ArrayList<>();
private static Stack<String> tableNameStack = new Stack<>();
private static Stack<Oper> operStack = new Stack<>();
private static String nowQueryTable = "";//定义及处理不清晰,修改为query或from节点对应的table集合或许好点。目前正在查询处理的表可能不止一个。
private static Oper oper ;
private static boolean joinClause = false;
private enum Oper {
SELECT, INSERT, DROP, TRUNCATE, LOAD, CREATETABLE, ALTER
}
public static Set<String> parseIteral(ASTNode ast) {
Set<String> set= new HashSet<>();//当前查询所对应到的表集合
prepareToParseCurrentNodeAndChilds(ast);
set.addAll(parseChildNodes(ast));
set.addAll(parseCurrentNode(ast ,set));
endParseCurrentNode(ast);
return set;
}
private static void endParseCurrentNode(ASTNode ast){
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {//join 从句结束,跳出join
case HiveParser.TOK_RIGHTOUTERJOIN:
case HiveParser.TOK_LEFTOUTERJOIN:
case HiveParser.TOK_JOIN:
joinClause = false;
break;
case HiveParser.TOK_QUERY:
break;
case HiveParser.TOK_INSERT:
case HiveParser.TOK_SELECT:
nowQueryTable = tableNameStack.pop();
oper = operStack.pop();
break;
}
}
}
private static Set<String> parseCurrentNode(ASTNode ast, Set<String> set){
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {
case HiveParser.TOK_TABLE_PARTITION:
if (ast.getChildCount() != 2) {
String table = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = table;
}
tables.add(table + " " + oper);
}
break;
case HiveParser.TOK_TAB:// outputTable
String tableTab = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = tableTab;
}
tables.add(tableTab + " " + oper);
break;
case HiveParser.TOK_TABREF:// inputTable
ASTNode tabTree = (ASTNode) ast.getChild(0);
String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer
.getUnescapedName((ASTNode) tabTree.getChild(0))
: BaseSemanticAnalyzer
.getUnescapedName((ASTNode) tabTree.getChild(0))
+ "." + tabTree.getChild(1);
if (oper == Oper.SELECT) {
if(joinClause && !"".equals(nowQueryTable) ){
nowQueryTable += "&"+tableName;//
}else{
nowQueryTable = tableName;
}
set.add(tableName);
}
tables.add(tableName + " " + oper);
if (ast.getChild(1) != null) {
String alia = ast.getChild(1).getText().toLowerCase();
alias.put(alia, tableName);
}
break;
case HiveParser.TOK_TABLE_OR_COL:
if (ast.getParent().getType() != HiveParser.DOT) {
String col = ast.getChild(0).getText().toLowerCase();
if (alias.get(col) == null
&& colAlais.get(nowQueryTable + "." + col) == null) {
if(nowQueryTable.indexOf("&") > 0){//sql23
cols.put(UNKNOWN + "." + col, "");
}else{
cols.put(nowQueryTable + "." + col, "");
}
}
}
break;
case HiveParser.TOK_ALLCOLREF:
cols.put(nowQueryTable + ".*", "");
break;
case HiveParser.TOK_SUBQUERY:
if (ast.getChildCount() == 2) {
String tableAlias = unescapeIdentifier(ast.getChild(1)
.getText());
String aliaReal = "";
for(String table : set){
aliaReal+=table+"&";
}
if(aliaReal.length() !=0){
aliaReal = aliaReal.substring(0, aliaReal.length()-1);
}
alias.put(tableAlias, aliaReal);
}
break;
case HiveParser.TOK_SELEXPR:
if (ast.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
String column = ast.getChild(0).getChild(0).getText()
.toLowerCase();
if(nowQueryTable.indexOf("&") > 0){
cols.put(UNKNOWN + "." + column, "");
}else if (colAlais.get(nowQueryTable + "." + column) == null) {
cols.put(nowQueryTable + "." + column, "");
}
} else if (ast.getChild(1) != null) {
String columnAlia = ast.getChild(1).getText().toLowerCase();
colAlais.put(nowQueryTable + "." + columnAlia, "");
}
break;
case HiveParser.DOT:
if (ast.getType() == HiveParser.DOT) {
if (ast.getChildCount() == 2) {
if (ast.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& ast.getChild(0).getChildCount() == 1
&& ast.getChild(1).getType() == HiveParser.Identifier) {
String alia = BaseSemanticAnalyzer
.unescapeIdentifier(ast.getChild(0)
.getChild(0).getText()
.toLowerCase());
String column = BaseSemanticAnalyzer
.unescapeIdentifier(ast.getChild(1)
.getText().toLowerCase());
String realTable = null;
if (!tables.contains(alia + "" + oper)
&& alias.get(alia) == null) {// [b SELECT, a
// SELECT]
alias.put(alia, nowQueryTable);
}
if (tables.contains(alia + "" + oper)) {
realTable = alia;
} else if (alias.get(alia) != null) {
realTable = alias.get(alia);
}
if (realTable == null || realTable.length() == 0 || realTable.indexOf("&") > 0) {
realTable = UNKNOWN;
}
cols.put(realTable + "." + column, "");
}
}
}
break;
case HiveParser.TOK_ALTERTABLE_ADDPARTS:
case HiveParser.TOK_ALTERTABLE_RENAME:
case HiveParser.TOK_ALTERTABLE_ADDCOLS:
ASTNode alterTableName = (ASTNode) ast.getChild(0);
tables.add(alterTableName.getText() + "" + oper);
break;
}
}
return set;
}
private static Set<String> parseChildNodes(ASTNode ast){
Set<String> set= new HashSet<String>();
int numCh = ast.getChildCount();
if (numCh > 0) {
for (int num = 0; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
set.addAll(parseIteral(child));
}
}
return set;
}
private static void prepareToParseCurrentNodeAndChilds(ASTNode ast){
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {//join 从句开始
case HiveParser.TOK_RIGHTOUTERJOIN:
case HiveParser.TOK_LEFTOUTERJOIN:
case HiveParser.TOK_JOIN:
joinClause = true;
break;
case HiveParser.TOK_QUERY:
tableNameStack.push(nowQueryTable);
operStack.push(oper);
nowQueryTable = "";//sql22
oper = Oper.SELECT;
break;
case HiveParser.TOK_INSERT:
tableNameStack.push(nowQueryTable);
operStack.push(oper);
oper = Oper.INSERT;
break;
case HiveParser.TOK_SELECT:
tableNameStack.push(nowQueryTable);
operStack.push(oper);
oper = Oper.SELECT;
break;
case HiveParser.TOK_DROPTABLE:
oper = Oper.DROP;
break;
case HiveParser.TOK_TRUNCATETABLE:
oper = Oper.TRUNCATE;
break;
case HiveParser.TOK_LOAD:
oper = Oper.LOAD;
break;
case HiveParser.TOK_CREATETABLE:
oper = Oper.CREATETABLE;
break;
}
if (ast.getToken() != null
&& ast.getToken().getType() >= HiveParser.TOK_ALTERDATABASE_PROPERTIES
&& ast.getToken().getType() <= HiveParser.TOK_ALTERVIEW_RENAME) {
oper = Oper.ALTER;
}
}
}
public static String unescapeIdentifier(String val) {
if (val == null) {
return null;
}
if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
val = val.substring(1, val.length() - 1);
}
return val;
}
//获取字段和别名
private static void output(Map<String, String> map) {
Iterator<String> it = map.keySet().iterator();
while (it.hasNext()) {
String key = it.next();
System.out.println(key + "" + map.get(key));
}
}
public static List<String> splitSql(String sql) {
sql = sql.replaceAll("^\\s*|\\s*$", "");
char[] cs = sql.toCharArray();
int quotTimes = 0;
List<Integer> marksSplit = new ArrayList();
for (int i = 0; i < cs.length; i++) {
char c = cs[i];
if (c == 39) {
quotTimes++;
}
if (c == 59 && quotTimes % 2 == 0) {
marksSplit.add(i);
}
if (i == cs.length - 1 && c != 59) {
marksSplit.add(i + 1);
}
}
List<String> sqls = new ArrayList<>();
if (!marksSplit.isEmpty()) {
for (int i = 0; i < marksSplit.size(); i++) {
if (i == 0) {
sqls.add(sql.substring(0, marksSplit.get(i)));
} else {
sqls.add(sql.substring(marksSplit.get(i - 1) + 1, marksSplit.get(i)));
}
}
} else {
sqls.add(sql);
}
return sqls;
}
public static Map<String, Hashtable<String, Integer>> sqlParse(String sql) throws ParseException {
ParseDriver pd = new ParseDriver();
List<String> sqlList = splitSql(sql);
Map<String, Hashtable<String, Integer>> map = new HashMap<>();
for(String s : sqlList) {
ASTNode ast = pd.parse(s);
parseIteral(ast);
Hashtable<String, Integer> hashtable = new Hashtable<>();
for (String table : tables) {
String[] split = table.split(" ");
String tableName = split[0];
if(hashtable.containsKey(tableName)) {
hashtable.put(tableName, hashtable.get(tableName)+1);
}else {
hashtable.put(tableName, 1);
}
}
map.put("tables", hashtable);
}
return map;
}
/**
* 解析sql并选出指定类型的相关表
* @param sql
* @param type
* @return
* @throws ParseException
*/
public static Map<String, Hashtable<String, Integer>> sqlParseAssignType(String sql,String type) throws Exception {
ParseDriver pd = new ParseDriver();
List<String> sqlList = splitSql(sql);
Map<String, Hashtable<String, Integer>> map = new HashMap<>();
for(String s : sqlList) {
ASTNode ast = pd.parse(s);
parseIteral(ast);
Hashtable<String, Integer> hashtable = new Hashtable<>();
for (String table : tables) {
String[] split = table.split(" ");
String tableName = split[0];
String tableType = split[1];
if(tableType.equalsIgnoreCase(type)) { //只有符合类型的才会被保存
if(hashtable.containsKey(tableName)) {
hashtable.put(tableName, hashtable.get(tableName)+1);
}else {
hashtable.put(tableName, 1);
}
}
}
map.put("tables", hashtable);
}
return map;
}
public static Map<String, Hashtable<String, Integer>> sqlParseAssignType(String sql) throws Exception {
ParseDriver pd = new ParseDriver();
List<String> sqlList = splitSql(sql);
Map<String, Hashtable<String, Integer>> map = new HashMap<>();
for(String s : sqlList) {
ASTNode ast = pd.parse(s);
parseIteral(ast);
Hashtable<String, Integer> hashtable = new Hashtable<>();
for (String table : tables) {
String[] split = table.split(" ");
String tableName = split[0];
if(hashtable.containsKey(tableName)) {
hashtable.put(tableName, hashtable.get(tableName)+1);
}else {
hashtable.put(tableName, 1);
}
}
map.put("tables", hashtable);
}
return map;
}
}
定义一个集合收集一下解析的内容
Map<String, Hashtable<String, Integer>> sqlParse=new HashMap<>();
对sql进行解析
HiveParseUtils.tables = new ArrayList<>();
sqlParse = HiveParseUtils.sqlParseAssignType(line);
获得热度
//得到内容
Map<String,Integer> result=new HashMap<>();
Hashtable<String, Integer> hashtable = sqlParse.get("tables");
//循环遍历,key为表名,value为表的使用次数
for (Iterator itr = hashtable.keySet().iterator(); itr.hasNext(); ) {
String key = (String) itr.next();
String table = key.trim().toLowerCase();
if (key.contains(".")) {
//这里这步是只获取表名,不要库名,可按需改造
table = key.split("\\.")[1];
}
int value = hashtable.get(key);
}
搞定!!!