druid的工具的 sql语句分析器 SQLStatementParser
,很犀利,一直想一探究竟,今天正好有时间,来学习下。
先看下 SQLStatementParser的类结构:
protected SchemaRepository repository; protected SQLExprParser exprParser; protected boolean parseCompleteValues = true; protected int parseValuesSize = 3; protected SQLSelectListCache selectListCache = null; protected InsertColumnsCache insertColumnsCache = null;
比较关键的是SQLExprParser:这个表达式解析器,
SQLExprParser的类结构:
protected final Lexer lexer; protected DbType dbType;
上面是关键的类结构信息:Lexer 词法分析器
Lexer中包含了:
protected Token token; //当前要解析的token
public final String text //原始的sql语句
protected Keywords keywords = Keywords.DEFAULT_KEYWORDS; //默认的token关键字
Keywords 包含了:
private final Map<String, Token> keywords;
keywords类一初始化就会默认加载:
map.put("ALL", Token.ALL); map.put("ALTER", Token.ALTER); map.put("AND", Token.AND); map.put("ANY", Token.ANY); map.put("AS", Token.AS); map.put("ENABLE", Token.ENABLE); map.put("DISABLE", Token.DISABLE); map.put("ASC", Token.ASC); map.put("BETWEEN", Token.BETWEEN); map.put("BY", Token.BY); map.put("CASE", Token.CASE); map.put("CAST", Token.CAST); map.put("CHECK", Token.CHECK); map.put("CONSTRAINT", Token.CONSTRAINT); map.put("CREATE", Token.CREATE); map.put("DATABASE", Token.DATABASE); map.put("DEFAULT", Token.DEFAULT); map.put("COLUMN", Token.COLUMN); map.put("TABLESPACE", Token.TABLESPACE); map.put("PROCEDURE", Token.PROCEDURE); map.put("FUNCTION", Token.FUNCTION); map.put("DELETE", Token.DELETE); map.put("DESC", Token.DESC); map.put("DISTINCT", Token.DISTINCT); map.put("DROP", Token.DROP); map.put("ELSE", Token.ELSE); map.put("EXPLAIN", Token.EXPLAIN); map.put("EXCEPT", Token.EXCEPT); map.put("END", Token.END); map.put("ESCAPE", Token.ESCAPE); map.put("EXISTS", Token.EXISTS); map.put("FOR", Token.FOR); map.put("FOREIGN", Token.FOREIGN); map.put("FROM", Token.FROM); map.put("FULL", Token.FULL); map.put("GROUP", Token.GROUP); map.put("HAVING", Token.HAVING); map.put("IN", Token.IN); map.put("INDEX", Token.INDEX); map.put("INNER", Token.INNER); map.put("INSERT", Token.INSERT); map.put("INTERSECT", Token.INTERSECT); map.put("INTERVAL", Token.INTERVAL); map.put("INTO", Token.INTO); map.put("IS", Token.IS); map.put("JOIN", Token.JOIN); map.put("KEY", Token.KEY); map.put("LEFT", Token.LEFT); map.put("LIKE", Token.LIKE); map.put("LOCK", Token.LOCK); map.put("MINUS", Token.MINUS); map.put("NOT", Token.NOT); map.put("NULL", Token.NULL); map.put("ON", Token.ON); map.put("OR", Token.OR); map.put("ORDER", Token.ORDER); map.put("OUTER", Token.OUTER); map.put("PRIMARY", Token.PRIMARY); map.put("REFERENCES", Token.REFERENCES); map.put("RIGHT", Token.RIGHT); map.put("SCHEMA", Token.SCHEMA); map.put("SELECT", Token.SELECT); map.put("SET", Token.SET); map.put("SOME", Token.SOME); map.put("TABLE", Token.TABLE); map.put("THEN", Token.THEN); map.put("TRUNCATE", Token.TRUNCATE); map.put("UNION", Token.UNION); map.put("UNIQUE", Token.UNIQUE); map.put("UPDATE", Token.UPDATE); map.put("VALUES", Token.VALUES); map.put("VIEW", Token.VIEW); map.put("SEQUENCE", Token.SEQUENCE); map.put("TRIGGER", Token.TRIGGER); map.put("USER", Token.USER); map.put("WHEN", Token.WHEN); map.put("WHERE", Token.WHERE); map.put("XOR", Token.XOR); map.put("OVER", Token.OVER); map.put("TO", Token.TO); map.put("USE", Token.USE); map.put("REPLACE", Token.REPLACE); map.put("COMMENT", Token.COMMENT); map.put("COMPUTE", Token.COMPUTE); map.put("WITH", Token.WITH); map.put("GRANT", Token.GRANT); map.put("REVOKE", Token.REVOKE); // MySql procedure: add by zz map.put("WHILE", Token.WHILE); map.put("DO", Token.DO); map.put("DECLARE", Token.DECLARE); map.put("LOOP", Token.LOOP); map.put("LEAVE", Token.LEAVE); map.put("ITERATE", Token.ITERATE); map.put("REPEAT", Token.REPEAT); map.put("UNTIL", Token.UNTIL); map.put("OPEN", Token.OPEN); map.put("CLOSE", Token.CLOSE); map.put("CURSOR", Token.CURSOR); map.put("FETCH", Token.FETCH); map.put("OUT", Token.OUT); map.put("INOUT", Token.INOUT); map.put("LIMIT", Token.LIMIT); 上面的关键字被加载到keywords中,看到这我们大概明白了,整个解析sql语句的动作,就是按照上面内置的token,去分割这些sql语句。
下面来验证我们的猜想:
//首先创建一个解析器
SQLStatementParser parser = SQLParserUtils.createSQLStatementParser(sql, dbType);
//然后调用parseStatementList, 返回一个集合
List<SQLStatement> stmtList = parser.parseStatementList();
为什么返回一个集合对象,是因为select 有子查询的这种情况。
//下面这个是核心方法
public void parseStatementList(List<SQLStatement> statementList, int max, SQLObject parent) {
//该方法根据sql的第一个词,来判断sql语句的类型
switch (lexer.token) {
case SELECT: {
//下面是针对注释的解析,不重要
MySqlHintStatement hintStatement = null;
if (i == 1
&& statementList.size() > 0
&& statementList.get(statementList.size() - i) instanceof MySqlHintStatement) {
hintStatement = (MySqlHintStatement) statementList.get(statementList.size() - i);
} else if (i > 0 && dbType != DbType.odps && !semi) {
throw new ParserException("syntax error. " + lexer.info());
}
//这一段很重要核心方法
SQLStatement stmt = parseSelect();
stmt.setParent(parent);
if (hintStatement != null && stmt instanceof SQLStatementImpl) {
SQLStatementImpl stmtImpl = (SQLStatementImpl) stmt;
List<SQLCommentHint> hints = stmtImpl.getHeadHintsDirect();
if (hints == null) {
stmtImpl.setHeadHints(hintStatement.getHints());
} else {
hints.addAll(hintStatement.getHints());
}
statementList.set(statementList.size() - 1, stmt);
} else {
statementList.add(stmt);
}
semi = false;
continue;
}
}
}
我们来看下 parseSelect 里面的动作
public SQLStatement parseSelect() {
//我们来看下创建一个select语句的解析器
MySqlSelectParser selectParser = createSQLSelectParser();
//得到一个select语句的对象
SQLSelect select = selectParser.select();
if (selectParser.returningFlag) {
return selectParser.updateStmt;
}
return new SQLSelectStatement(select, DbType.mysql);
}
再来看下 select()方法
SQLSelect select = new SQLSelect();
//查询语句: 对应的实现类:SQLSelectQueryBlock
SQLSelectQuery query = query(select, true);
//selectSql语句中,设置了SQLSelectQuery 语句
select.setQuery(query);
//排序的语句
SQLOrderBy orderBy = this.parseOrderBy();
if (query instanceof SQLSelectQueryBlock) {
SQLSelectQueryBlock queryBlock = (SQLSelectQueryBlock) query;
if (queryBlock.getOrderBy() == null) {
queryBlock.setOrderBy(orderBy);
if (lexer.token == Token.LIMIT) {
SQLLimit limit = this.exprParser.parseLimit();
queryBlock.setLimit(limit);
}
} else {
select.setOrderBy(orderBy);
if (lexer.token == Token.LIMIT) {
SQLLimit limit = this.exprParser.parseLimit();
select.setLimit(limit);
}
}
if (orderBy != null) {
parseFetchClause(queryBlock);
}
} else {
select.setOrderBy(orderBy);
}
if (lexer.token == Token.LIMIT) {
SQLLimit limit = this.exprParser.parseLimit();
select.setLimit(limit);
}
while (lexer.token == Token.HINT) {
this.exprParser.parseHints(select.getHints());
}
return select;
再来看query()的 核心 方法
public SQLSelectQuery query(SQLObject parent, boolean acceptUnion) {
MySqlSelectQueryBlock queryBlock = new MySqlSelectQueryBlock();
queryBlock.setParent(parent);
this.lexer.setCommentHandler(new QueryHintHandler(queryBlock, this.lexer));
if (lexer.token() == Token.SELECT) {
if (selectListCache != null) {
selectListCache.match(lexer, queryBlock);
}
}
//关键核心代码
if (lexer.token() == Token.SELECT) {
//得到下一个词的枚举值: *
lexer.nextTokenValue();
for(;;) {
if (lexer.token() == Token.HINT) {
this.exprParser.parseHints(queryBlock.getHints());
} else {
break;
}
}
//上面这些对象的解析,需要在解析SqlItem之前
while (true) {
Token token = lexer.token();
if (token == (Token.DISTINCT)) {
queryBlock.setDistionOption(SQLSetQuantifier.DISTINCT);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.DISTINCTROW)) {
queryBlock.setDistionOption(SQLSetQuantifier.DISTINCTROW);
lexer.nextToken();
} else if (token == (Token.ALL)) {
queryBlock.setDistionOption(SQLSetQuantifier.ALL);
lexer.nextToken();
} else if (token == (Token.UNIQUE)) {
queryBlock.setDistionOption(SQLSetQuantifier.UNIQUE);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.HIGH_PRIORITY)) {
queryBlock.setHignPriority(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.STRAIGHT_JOIN)) {
queryBlock.setStraightJoin(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.SQL_SMALL_RESULT)) {
queryBlock.setSmallResult(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.SQL_BIG_RESULT)) {
queryBlock.setBigResult(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.SQL_BUFFER_RESULT)) {
queryBlock.setBufferResult(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.SQL_CACHE)) {
queryBlock.setCache(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.SQL_NO_CACHE)) {
queryBlock.setCache(false);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.SQL_CALC_FOUND_ROWS)) {
queryBlock.setCalcFoundRows(true);
lexer.nextToken();
} else if (lexer.identifierEquals(FnvHash.Constants.TOP)) {
Lexer.SavePoint mark = lexer.mark();
lexer.nextToken();
if (lexer.token() == Token.LITERAL_INT) {
SQLLimit limit = new SQLLimit(lexer.integerValue().intValue());
queryBlock.setLimit(limit);
lexer.nextToken();
} else if (lexer.token() == Token.DOT) {
lexer.reset(mark);
break;
}
} else {
break;
}
}
//上面没有符合条件的,代码走到此处
parseSelectList(queryBlock);
if (lexer.identifierEquals(FnvHash.Constants.FORCE)) {
lexer.nextToken();
accept(Token.PARTITION);
SQLName partition = this.exprParser.name();
queryBlock.setForcePartition(partition);
}
parseInto(queryBlock);
}
//解析from操作
parseFrom(queryBlock);
//解析where 操作
parseWhere(queryBlock);
parseHierachical(queryBlock);
if (lexer.token() == Token.GROUP || lexer.token() == Token.HAVING) {
parseGroupBy(queryBlock);
}
if (lexer.identifierEquals(FnvHash.Constants.WINDOW)) {
parseWindow(queryBlock);
}
if (lexer.token() == Token.ORDER) {
queryBlock.setOrderBy(this.exprParser.parseOrderBy());
}
if (lexer.token() == Token.LIMIT) {
queryBlock.setLimit(this.exprParser.parseLimit());
}
if (lexer.token() == Token.FETCH) {
final Lexer.SavePoint mark = lexer.mark();
lexer.nextToken();
if (lexer.identifierEquals(FnvHash.Constants.NEXT)) {
lexer.nextToken();
SQLExpr rows = this.exprParser.primary();
queryBlock.setLimit(
new SQLLimit(rows));
acceptIdentifier("ROWS");
acceptIdentifier("ONLY");
} else {
lexer.reset(mark);
}
}
if (lexer.token() == Token.PROCEDURE) {
lexer.nextToken();
throw new ParserException("TODO. " + lexer.info());
}
if (lexer.token() == Token.INTO) {
parseInto(queryBlock);
}
if (lexer.token() == Token.FOR) {
lexer.nextToken();
if (lexer.token() == Token.UPDATE) {
lexer.nextToken();
queryBlock.setForUpdate(true);
if (lexer.identifierEquals(FnvHash.Constants.NO_WAIT)
|| lexer.identifierEquals(FnvHash.Constants.NOWAIT)) {
lexer.nextToken();
queryBlock.setNoWait(true);
} else if (lexer.identifierEquals(FnvHash.Constants.WAIT)) {
lexer.nextToken();
SQLExpr waitTime = this.exprParser.primary();
queryBlock.setWaitTime(waitTime);
}
if (lexer.identifierEquals(FnvHash.Constants.SKIP)) {
lexer.nextToken();
acceptIdentifier("LOCKED");
queryBlock.setSkipLocked(true);
}
} else {
acceptIdentifier("SHARE");
queryBlock.setForShare(true);
}
}
if (lexer.token() == Token.LOCK) {
lexer.nextToken();
accept(Token.IN);
acceptIdentifier("SHARE");
acceptIdentifier("MODE");
queryBlock.setLockInShareMode(true);
}
if (hints != null) {
queryBlock.setHints(hints);
}
return queryRest(queryBlock, acceptUnion);
}
继续查询看核心 方法
//把select语句中的内容,解析成sqlItem对象
protected void parseSelectList(SQLSelectQueryBlock queryBlock) {
//
final List<SQLSelectItem> selectList = queryBlock.getSelectList();
for (;;) {
final SQLSelectItem selectItem = this.exprParser.parseSelectItem();
//
selectList.add(selectItem);
//表示selectItem属于这个select语句块的
selectItem.setParent(queryBlock);
if (lexer.token != Token.COMMA) {
break;
}
lexer.nextToken();
}
}
//
public SQLSelectItem parseSelectItem() {
//根据STAR,创建一个对象的Expr表达式
if (token == Token.STAR) {
expr = new SQLAllColumnExpr();
//把分词流转到下一个
lexer.nextToken();
//封装到item里面
return new SQLSelectItem(expr, (String) null, connectByRoot);
}
}
public void queryFrom(queryBlock){
//得到from的表对象
SQLTableSource from = parseTableSource(queryBlock);
queryBlock.setFrom(from);
}