背景:最近我们在做缓存预热,需要知道SQL有哪些表,但又不想传递,因此考虑进行SQL解析,解析出都用到了哪些表,因此调研了三种SQL解析器。
话不多说,直接上结论:
功能 | Calcite | Druid | Jsqlparser |
---|---|---|---|
是否支持Doris语法 | 不支持 | 支持 | 支持 |
性能对比(不带shopId,单位ms) | – | 300+ | 100+ |
性能对比(带shopId,单位ms) | – | 300+ | 600+ |
总结:短SQL、Jsqlparser快;长SQL,Druid快。
结论:SQL先replaceAll替换掉shopId(耗时10ms),然后用Jsqlparser方式解析。
代码示例:
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.2.14</version>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<version>1.19.0</version>
</dependency>
<dependency>
<groupId>com.github.jsqlparser</groupId>
<artifactId>jsqlparser</artifactId>
<version>4.4</version>
</dependency>
public static void main(String[] args) throws SqlParseException, JSQLParserException {
String sql = "select temp.data_rank as data_rank\n" +
"from (SELECT t.food_show_name,\n" +
" rank() over ( ORDER BY t.real_profit DESC ) AS data_rank,\n" +
" if(t.theory_profit is null or t.theory_profit = 0, null,\n" +
" rank() over ( ORDER BY abs(t.real_profit - t.theory_profit / t.theory_profit))) AS rate_rank\n" +
" from (SELECT concat(food_name, if(unit is null, '', concat('/', unit))) as food_show_name,\n" +
" sum(food_real_amt) - sum(real_cost) AS real_profit,\n" +
" sum(food_real_amt) - sum(theory_cost) AS theory_profit\n" +
" FROM tbl_boss_supply_shop_food_cost_day\n" +
" WHERE group_id = 290764\n" +
" and shop_id in\n" +
" (@shopId@)\n" +
" and report_date >= '2022-10-01'\n" +
" and report_date <= '2022-10-31'\n" +
" and brand_id in (334910)\n" +
" and real_cost is not null\n" +
" and theory_cost is not null\n" +
" and food_real_amt is not null\n" +
" GROUP BY food_show_name) t) temp\n" +
"where temp.food_show_name = '生乳卷/盒'";
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 3000; i++) {
builder.append(new Random().nextLong());
if(i != 2999)
builder.append(",");
}
long l4 = System.currentTimeMillis();
String sss = sql.replaceAll("@shopId@", builder.toString());
sss = sss.replaceAll(" shop_id in ([\\s\\S]*?)\n", " shop_id in (123)");
long l5 = System.currentTimeMillis();
System.out.println(l5 - l4);
// calcite,不支持doris,运行报错
// SqlNode sqlNode = parseStatement(sss);
// Set<String> strings = extractSourceTableInSelectSql(sqlNode, false);
// System.out.println(strings);
// Jsqlparser
long l = System.currentTimeMillis();
Statement statement = CCJSqlParserUtil.parse(sss);
Select selectStatement = (Select) statement;
TablesNamesFinder tablesNamesFinder = new TablesNamesFinder();
List<String> tableList = tablesNamesFinder.getTableList(selectStatement);
long l3 = System.currentTimeMillis();
System.out.println(l3 - l);
System.out.println(tableList);
// druid
long l1 = System.currentTimeMillis();
SQLStatement sqlStatements = SQLUtils.parseSingleMysqlStatement(sss);
MySqlSchemaStatVisitor visitor = new MySqlSchemaStatVisitor();
sqlStatements.accept(visitor);
Map<TableStat.Name, TableStat> tables = visitor.getTables();
long l2 = System.currentTimeMillis();
System.out.println(l2 - l1);
System.out.println(tables);
}
public static SqlNode parseStatement(String sql) {
SqlParser.Config config = SqlParser.configBuilder().setLex(Lex.MYSQL).setCaseSensitive(true).setConformance(SqlConformanceEnum.MYSQL_5).build();
SqlParser parser = SqlParser.create(sql, config);
try {
return parser.parseQuery();
} catch (Exception e) {
e.printStackTrace();
throw new UnsupportedOperationException("operation not allowed");
}
}
private static Set<String> extractSourceTableInSelectSql(SqlNode sqlNode, boolean fromOrJoin) {
if (sqlNode == null) {
return new HashSet<>();
}
final SqlKind sqlKind = sqlNode.getKind();
if (SqlKind.SELECT.equals(sqlKind)) {
SqlSelect selectNode = (SqlSelect) sqlNode;
Set<String> selectList = new HashSet<>(extractSourceTableInSelectSql(selectNode.getFrom(), true));
selectNode.getSelectList().getList().stream().filter(node -> node instanceof SqlCall)
.forEach(node -> selectList.addAll(extractSourceTableInSelectSql(node, false)));
selectList.addAll(extractSourceTableInSelectSql(selectNode.getWhere(), false));
selectList.addAll(extractSourceTableInSelectSql(selectNode.getHaving(), false));
return selectList;
}
if (SqlKind.JOIN.equals(sqlKind)) {
SqlJoin sqlJoin = (SqlJoin) sqlNode;
Set<String> joinList = new HashSet<>();
joinList.addAll(extractSourceTableInSelectSql(sqlJoin.getLeft(), true));
joinList.addAll(extractSourceTableInSelectSql(sqlJoin.getRight(), true));
return joinList;
}
if (SqlKind.AS.equals(sqlKind)) {
SqlCall sqlCall = (SqlCall) sqlNode;
return extractSourceTableInSelectSql(sqlCall.getOperandList().get(0), fromOrJoin);
}
if (SqlKind.IDENTIFIER.equals(sqlKind)) {
Set<String> identifierList = new HashSet<>();
if (fromOrJoin) {
SqlIdentifier sqlIdentifier = (SqlIdentifier) sqlNode;
identifierList.add(sqlIdentifier.toString());
}
return identifierList;
}
Set<String> defaultList = new HashSet<>();
if (sqlNode instanceof SqlCall) {
SqlCall call = (SqlCall) sqlNode;
call.getOperandList()
.forEach(node -> defaultList.addAll(extractSourceTableInSelectSql(node, false)));
return defaultList;
}
return null;
}