以下是一个简单的 SQL SELECT 解析器的 Java 实现,支持单表查询和基本条件过滤。代码包含词法分析和语法分析模块,并支持以下语法:
SELECT column1, column2 FROM table WHERE column3 = 5
完整代码
1. Token 类型定义 (TokenType.java
)
public enum TokenType {
SELECT, FROM, WHERE,
IDENTIFIER, COMMA, STAR, EQUALS,
STRING, NUMBER, EOF
}
2. Token 类 (Token.java
)
public class Token {
public final TokenType type;
public final String value;
public Token(TokenType type, String value) {
this.type = type;
this.value = value;
}
@Override
public String toString() {
return String.format("(%s, %s)", type, value);
}
}
3. 词法分析器 (Lexer.java
)
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Lexer {
private final String input;
private int pos = 0;
private static final Pattern TOKEN_PATTERN = Pattern.compile(
"(?i)(SELECT|FROM|WHERE)" + // 关键字 (不区分大小写)
"|([a-zA-Z_][a-zA-Z0-9_]*)" + // 标识符
"|(\\*)" + // 星号
"|(,)" + // 逗号
"|(=)" + // 等于号
"|('([^']*)')" + // 字符串字面量
"|(\\d+)" + // 数字
"|(\\s+)" // 空白字符 (跳过)
);
public Lexer(String input) {
this.input = input;
}
public ArrayList<Token> tokenize() {
ArrayList<Token> tokens = new ArrayList<>();
Matcher matcher = TOKEN_PATTERN.matcher(input);
while (pos < input.length()) {
if (!matcher.find(pos)) {
throw new RuntimeException("Invalid token at position: " + pos);
}
// 跳过空白字符
if (matcher.group(7) != null) {
pos = matcher.end();
continue;
}
// 匹配其他 Token
for (int i = 1; i <= matcher.groupCount(); i++) {
if (matcher.group(i) != null) {
TokenType type = null;
String value = matcher.group(i);
switch (i) {
case 1: // 关键字
type = TokenType.valueOf(value.toUpperCase());
break;
case 2: // 标识符
type = TokenType.IDENTIFIER;
break;
case 3: // *
type = TokenType.STAR;
break;
case 4: // ,
type = TokenType.COMMA;
break;
case 5: // =
type = TokenType.EQUALS;
break;
case 6: // 字符串 (带引号)
type = TokenType.STRING;
value = matcher.group(7); // 去掉引号
break;
case 8: // 数字
type = TokenType.NUMBER;
break;
}
if(type!=null){
tokens.add(new Token(type, value));
}
pos = matcher.end();
break;
}
}
}
tokens.add(new Token(TokenType.EOF, ""));
return tokens;
}
}
4. AST 结构类 (SelectQuery.java
)
import java.util.List;
public class SelectQuery {
public List columns;
public String table;
public Condition whereCondition;
public static class Condition {
public String column;
public String operator;
public String value;
}
}
5. 语法解析器 (Parser.java
)
import java.util.ArrayList;
import java.util.List;
public class Parser {
private final List tokens;
private int pos = 0;
public Parser(List<Token> tokens) {
this.tokens = tokens;
}
public SelectQuery parse() {
SelectQuery query = new SelectQuery();
parseSelect(query);
parseFrom(query);
parseWhere(query);
return query;
}
private void parseSelect(SelectQuery query) {
consume(TokenType.SELECT);
query.columns = new ArrayList<>();
if (peek().type == TokenType.STAR) {
consume(TokenType.STAR);
query.columns.add("*");
} else {
do {
query.columns.add(consume(TokenType.IDENTIFIER).value);
} while (match(TokenType.COMMA));
}
}
private void parseFrom(SelectQuery query) {
consume(TokenType.FROM);
query.table = consume(TokenType.IDENTIFIER).value;
}
private void parseWhere(SelectQuery query) {
if (match(TokenType.WHERE)) {
SelectQuery.Condition condition = new SelectQuery.Condition();
condition.column = consume(TokenType.IDENTIFIER).value;
consume(TokenType.EQUALS);
Token valueToken = peek();
if (valueToken.type == TokenType.STRING || valueToken.type == TokenType.NUMBER) {
condition.value = valueToken.value;
advance();
} else {
throw new RuntimeException("Expected string or number");
}
query.whereCondition = condition;
}
}
private Token consume(TokenType expected) {
Token token = peek();
if (token.type != expected) {
throw new RuntimeException("Expected " + expected + ", found " + token.type);
}
advance();
return token;
}
private boolean match(TokenType type) {
if (peek().type == type) {
advance();
return true;
}
return false;
}
private Token peek() {
return tokens.get(pos);
}
private void advance() {
pos++;
}
}
6. 测试主类 (Main.java
)
import java.util.List;
public class Main {
public static void main(String[] args) {
String sql = “SELECT id, name FROM users WHERE age = 25”;
Lexer lexer = new Lexer(sql);
List tokens = lexer.tokenize();
Parser parser = new Parser(tokens);
SelectQuery query = parser.parse();
System.out.println("Columns: " + query.columns);
System.out.println("Table: " + query.table);
if (query.whereCondition != null) {
System.out.println("WHERE " + query.whereCondition.column +
" = " + query.whereCondition.value);
}
}
}
代码说明
-
词法分析器 (
Lexer
)- 使用正则表达式匹配 SQL 关键字、标识符、数字、字符串等 Token。
- 跳过空白字符,返回 Token 列表。
-
语法解析器 (
Parser
)- 递归下降解析器,依次解析
SELECT
、FROM
、WHERE
子句。 - 构建
SelectQuery
对象存储解析结果。
- 递归下降解析器,依次解析
-
AST 结构 (
SelectQuery
)- 保存查询的列、表名和过滤条件。
-
测试示例 (
Main
)- 输入 SQL 语句,输出解析后的结构。
运行结果
Columns: [id, name]
Table: users
WHERE age = 25
支持特性
- 单表
SELECT
查询 - 列名列表或
*
- 简单的
WHERE
条件(仅支持=
和字符串/数字值)
可根据需要扩展 WHERE 条件(如 >
, <
, AND
/OR
)和更复杂的数据类型。