Java的Calcite优化简单Sql尝试

21 篇文章 1 订阅

一、引入pom

引入calcite-core的1.26.0版本

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <calcite-core.version>1.26.0</calcite-core.version>
        <slf4j.version>1.7.25</slf4j.version>
        <junit.version>4.13.1</junit.version>
        <hydromatic.version>6.0</hydromatic.version>
        <guava.version>19.0</guava.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.calcite</groupId>
            <artifactId>calcite-core</artifactId>
            <version>${calcite-core.version}</version>
            <!--<version>1.11.0</version>-->
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>${guava.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.projectlombok/lombok -->
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.18</version>
            <scope>provided</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.12.0</version>
        </dependency>
    </dependencies>

二、代码结构

在这里插入图片描述

三、RBO优化尝试

1、Table类,保存字段名称,类型信息

import lombok.Data;

import java.io.Serializable;
import java.util.List;

/**
 * <p>标题: 表信息</p>
 * <p>功能描述: </p>
 *
 *
 * <p>创建时间: 2021/3/1 18:48</p>
 * <p>作者:test</p>
 * <p>修改历史记录:</p>
 * ====================================================================<br>
 **/
@Data
public class Table implements Serializable {
    private static final long serialVersionUID = -2971832417447325046L;

    /**
     * 表前缀
     **/
    private String schema;

    /**
     * 表名称
     **/
    private String table;

    /**
     * 字段信息
     **/
    private List<Field> fields;

    public Table() {

    }

    public Table(String schema, String table, List<Field> fields) {
        this.schema = schema;
        this.table = table;
        this.fields = fields;
    }

    @Data
    public static class Field implements Serializable {
        private static final long serialVersionUID = 5728717173174364365L;

        /**
         * 字段名
         **/
        private String name;

        /**
         * 字段类型
         **/
        private String type;

        public Field() {
        }

        public Field(String name, String type) {
            this.name = name;
            this.type = type;
        }
    }
}

2、数据类型枚举


/**
 * 数据类型 枚举类
 */
public enum DataTypeEnum {

    STRING("string","字符串"),
    TEXT("longtext","长字符串"),
    CLOB("clob","clob"),
    BLOB("blob","blob"),
    CHAR("char","字符"),
    NUMBER("number","数值型"),
    DECIMAL("decimal","浮点型"),
    DATE("date","日期类型"),
    TIMESTAMP("timestamp","时间戳类型"),
    INTEGER("integer","常用的整数")
    ;

    private String type;

    private String name;

    DataTypeEnum(String type, String name) {
        this.name =name;
        this.type = type ;
    }

    public String getName() {
        return name;
    }

    public String getType() {
        return type;
    }

    public static DataTypeEnum getByCode(String code) {
        for (DataTypeEnum obj : DataTypeEnum.values()) {
            if (obj.getType().equals(code)) {
                return obj;
            }
        }
        return null;
    }
}

3、自定义to_date函数

import org.apache.calcite.sql.*;
import org.apache.calcite.sql.type.*;
import org.apache.calcite.sql.validate.SqlMonotonicity;
import org.apache.calcite.sql.validate.SqlValidator;

/**
 * <p>标题: </p>
 * <p>功能描述: </p>
 *
 *
 * <p>创建时间: 2021/3/6 13:42</p>
 * <p>作者:test</p>
 * <p>修改历史记录:</p>
 * ====================================================================<br>
 **/
public class SqlToDateFunction extends SqlFunction {
    //~ Constructors -----------------------------------------------------------

    public SqlToDateFunction() {
        super(
                "TO_DATE",
                SqlKind.OTHER_FUNCTION,
                ReturnTypes.DATE,
                null,
                OperandTypes.ANY,
                SqlFunctionCategory.TIMEDATE);
    }

    //~ Methods ----------------------------------------------------------------

    public SqlSyntax getSyntax() {
        return SqlSyntax.FUNCTION_ID;
    }

    @Override public SqlMonotonicity getMonotonicity(SqlOperatorBinding call) {
        return SqlMonotonicity.INCREASING;
    }

    // Plans referencing context variables should never be cached
    public boolean isDynamicFunction() {
        return true;
    }

    @Override
    public SqlOperandCountRange getOperandCountRange() {
        /**
         * 返回参数个数信息
         **/
        return SqlOperandCountRanges.of(2);
    }

    @Override
    protected void checkOperandCount(SqlValidator validator, SqlOperandTypeChecker argType, SqlCall call) {
        /**
         * 参数个数校验
         **/
        assert call.operandCount() == 2;
    }
}

4、重写SqlStdOperatorTable添加to_date函数


import com.cys.test.calcite.sql.function.SqlToDateFunction;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;

/**
 * <p>标题: </p>
 * <p>功能描述: </p>
 *
 *
 * <p>创建时间: 2021/3/6 14:05</p>
 * <p>作者:test</p>
 * <p>修改历史记录:</p>
 * ====================================================================<br>
 **/
public class SqlMyOperatorTable extends SqlStdOperatorTable {

    private static SqlMyOperatorTable instance;

    public static final SqlFunction TO_DATE = new SqlToDateFunction();

    public static synchronized SqlMyOperatorTable instance() {
        if (instance == null) {
            // Creates and initializes the standard operator table.
            // Uses two-phase construction, because we can't initialize the
            // table until the constructor of the sub-class has completed.
            instance = new SqlMyOperatorTable();
            instance.init();
        }

        return instance;
    }
}

5、工具类优化


import com.cys.test.calcite.sql.model.DataTypeEnum;
import com.cys.test.calcite.sql.model.Table;
import com.cys.test.calcite.sql.table.SqlMyOperatorTable;
import lombok.extern.slf4j.Slf4j;
import org.apache.calcite.config.CalciteConnectionConfig;
import org.apache.calcite.config.CalciteConnectionConfigImpl;
import org.apache.calcite.jdbc.CalciteSchema;
import org.apache.calcite.plan.Context;
import org.apache.calcite.plan.ConventionTraitDef;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgramBuilder;
import org.apache.calcite.prepare.CalciteCatalogReader;
import org.apache.calcite.rel.RelDistributionTraitDef;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelRoot;
import org.apache.calcite.rel.rel2sql.RelToSqlConverter;
import org.apache.calcite.rel.rel2sql.SqlImplementor;
import org.apache.calcite.rel.rules.PruneEmptyRules;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeSystem;
import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.schema.impl.AbstractTable;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.dialect.OracleSqlDialect;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.type.BasicSqlType;
import org.apache.calcite.sql.type.SqlTypeFactoryImpl;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.validate.SqlConformance;
import org.apache.calcite.sql.validate.SqlConformanceEnum;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.calcite.sql2rel.RelDecorrelator;
import org.apache.calcite.sql2rel.SqlToRelConverter;
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Collectors;

/***
 * calcite的sql优化
 * @author test
 * @date 2021/3/1 18:46
 **/
@Slf4j
public class CalciteSqlUtil {

    /***
     * 根据Calcite的RBO,对sql进行简单优化
     * @author test
     * @date 2021/3/1 19:15
     * @param sql sql语句
     * @param type 数据库类型
     * @param tables 表信息
     * @return java.lang.String
     **/
    public static String optimizationSql(String sql, String type, List<Table> tables) throws Exception {
        SchemaPlus rootSchema = registerRootSchema(tables);

        final FrameworkConfig fromworkConfig = Frameworks.newConfigBuilder()
                .parserConfig(SqlParser.Config.DEFAULT)
                .defaultSchema(rootSchema)
                .traitDefs(ConventionTraitDef.INSTANCE, RelDistributionTraitDef.INSTANCE)
                .build();

        // HepPlanner RBO优化
        HepProgramBuilder builder = new HepProgramBuilder();
        builder.addRuleInstance(PruneEmptyRules.PROJECT_INSTANCE);
        HepPlanner planner = new HepPlanner(builder.build());

        try {
            SqlTypeFactoryImpl factory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT);

            // sql解析器
            SqlParser parser = SqlParser.create(sql, SqlParser.Config.DEFAULT);
            SqlNode parsed = parser.parseStmt();

            CalciteCatalogReader calciteCatalogReader = new CalciteCatalogReader(
                    CalciteSchema.from(rootSchema),
                    CalciteSchema.from(rootSchema).path(null),
                    factory,
                    new CalciteConnectionConfigImpl(new Properties())
            );

            // sql validate
            SqlValidator validator = SqlValidatorUtil.newValidator(
                    SqlMyOperatorTable.instance(),
                    calciteCatalogReader,
                    factory,
                    SqlValidator.Config.DEFAULT
            );
            SqlNode validated = validator.validate(parsed);

            final RexBuilder rexBuilder = new RexBuilder(factory);
            final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);

            // init SqlToRelConverter config
            final SqlToRelConverter.Config config = SqlToRelConverter.config();

            // SqlNode转换为RelNode
            final SqlToRelConverter sqlToRelConverter = new SqlToRelConverter(
                    new ViewExpanderImpl(),
                    validator,
                    calciteCatalogReader,
                    cluster,
                    fromworkConfig.getConvertletTable(),
                    config
            );

            RelRoot root = sqlToRelConverter.convertQuery(validated, false, true);

            root = root.withRel(sqlToRelConverter.flattenTypes(root.rel, true));
            final RelBuilder relBuilder = config.getRelBuilderFactory().create(cluster, null);
            root = root.withRel(RelDecorrelator.decorrelateQuery(root.rel, relBuilder));
            RelNode relNode = root.rel;

            // 寻找优化路径
            planner.setRoot(relNode);
            relNode = planner.findBestExp();

            // 转换为需要的数据库类型的sql
            RelToSqlConverter relToSqlConverter = new RelToSqlConverter(getSqlDialect(type));
            SqlImplementor.Result visit = relToSqlConverter.visitRoot(relNode);
            SqlNode sqlNode = visit.asStatement();

            return sqlNode.toSqlString(getSqlDialect(type)).getSql();
        } catch (Exception e) {
            log.error("SQL优化失败!", e);
            throw new Exception("SQL优化失败");
        }
    }

    /***
     * 注册表的字段信息
     * @author test
     * @date 2021/3/1 19:17
     * @param tables 表名
     * @return org.apache.calcite.schema.SchemaPlus
     **/
    private static SchemaPlus registerRootSchema(List<Table> tables) {
        SchemaPlus rootSchema = Frameworks.createRootSchema(true);

        List<Table> noSchemaTables = tables.stream().filter(t -> StringUtils.isEmpty(t.getSchema())).collect(Collectors.toList());
        for (Table table : noSchemaTables) {
            rootSchema.add(table.getTable().toUpperCase(), new AbstractTable() {
                @Override
                public RelDataType getRowType(RelDataTypeFactory relDataTypeFactory) {
                    RelDataTypeFactory.Builder builder = relDataTypeFactory.builder();

                    table.getFields().forEach(field -> {
                        builder.add(
                                field.getName().toUpperCase(),
                                new BasicSqlType(new RelDataTypeSystemImpl() {}, SqlTypeName.VARCHAR)
                        );
                    });

                    return builder.build();
                }
            });
        }

        Map<String, List<Table>> map = tables.stream().filter(t -> StringUtils.isNotEmpty(t.getSchema())).collect(Collectors.groupingBy(Table::getSchema));
        for (String key : map.keySet()) {
            List<Table> tableList = map.get(key);
            SchemaPlus schema = Frameworks.createRootSchema(true);
            for (Table table : tableList) {
                schema.add(table.getTable().toUpperCase(), new AbstractTable() {
                    @Override
                    public RelDataType getRowType(RelDataTypeFactory relDataTypeFactory) {
                        RelDataTypeFactory.Builder builder = relDataTypeFactory.builder();

                        table.getFields().forEach(field -> {
                            builder.add(
                                    field.getName().toUpperCase(),
                                    new BasicSqlType(new RelDataTypeSystemImpl() {}, getSqlTypeName(field.getType()))
                            );
                        });

                        return builder.build();
                    }
                });
            }

            rootSchema.add(key.toUpperCase(), schema);
        }

        return rootSchema;
    }

    private static SqlConformance conformance(FrameworkConfig config) {
        final Context context = config.getContext();
        if (null == context) {
            return SqlConformanceEnum.DEFAULT;
        }

        final CalciteConnectionConfig connectionConfig = context.unwrap(CalciteConnectionConfig.class);
        if (null == connectionConfig) {
            return SqlConformanceEnum.DEFAULT;
        }

        return connectionConfig.conformance();
    }

    private static class ViewExpanderImpl implements RelOptTable.ViewExpander {
        public ViewExpanderImpl() {
        }

        @Override
        public RelRoot expandView(RelDataType rowType, String queryString, List<String> schemaPath, List<String> viewPath) {
            return null;
        }
    }

    /***
     * 获取sql类型
     * @author test
     * @date 2021/3/3 10:27
     * @param type 类型
     * @return org.apache.calcite.sql.SqlDialect
     **/
    private static SqlDialect getSqlDialect(String type) {
        return OracleSqlDialect.DEFAULT;
    }

    /***
     * 根据DataTypeEnum获取对应的SqlTypeName
     * @author test
     * @date 2021/3/6 10:55
     * @param type DataTypeEnum的类型
     * @return org.apache.calcite.sql.type.SqlTypeName
     **/
    private static SqlTypeName getSqlTypeName(String type) {
        DataTypeEnum dataType = DataTypeEnum.getByCode(type);
        if (null == dataType) {
            return SqlTypeName.VARCHAR;
        }

        switch (dataType) {
            case CHAR:
            case CLOB:
            case TEXT:
            case BLOB:
            case STRING: return SqlTypeName.VARCHAR;
            case DECIMAL:
            case NUMBER: return SqlTypeName.DECIMAL;
            case INTEGER: return SqlTypeName.INTEGER;
            case DATE: return SqlTypeName.DATE;
            case TIMESTAMP: return SqlTypeName.DATE;
            default: return SqlTypeName.VARCHAR;
        }
    }

    public static void main(String[] args) throws Exception {
        String sql = "select t1.id, t1.name " +
                "from (" +
                "  select student.id,student.name from school.student " +
                "  where student.create_time > to_date('2021-03-04 12:00:00', 'yyyy-mm-dd hh24:mi:ss')" +
                ") t1";
        List<Table> tables = new ArrayList<>();
        List<Table.Field> fields = new ArrayList<>();
        fields.add(new Table.Field("id", DataTypeEnum.NUMBER.getType()));
        fields.add(new Table.Field("name", DataTypeEnum.STRING.getType()));
        fields.add(new Table.Field("create_time", DataTypeEnum.DATE.getType()));
        tables.add(new Table("school", "student", fields));

        sql = CalciteSqlUtil.optimizationSql(sql, "ORACLE", tables);
        System.out.println(sql);
    }
}

四、优化结果

1、优化前

select t1.id, t1.name from ( 
    select student.id,student.name 
    from school.student 
    where student.create_time > to_date('2021-03-04 12:00:00', 'yyyy-mm-dd hh24:mi:ss') 
) t1

2、优化后
在这里插入图片描述

  • 1
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
Calcite是一个开源的SQL解析和优化引擎,它提供了一个SQL测试框架,可以用来测试各种SQL语句的正确性和性能。 要使用CalciteSQL测试库,需要按照以下步骤进行操作: 1. 克隆Calcite的代码库到本地,并进入到`calcite/core`目录下; 2. 使用Maven构建Calcite的测试库:`mvn clean install -DskipTests -Pskip-spark,skip-flink`; 3. 进入到`calcite-core/target`目录下,可以看到生成了一个`calcite-core-XXX-tests.jar`文件; 4. 将这个jar文件添加到你的项目依赖中; 5. 在你的测试代码中,使用`org.apache.calcite.test.CalciteAssert`类提供的API来执行SQL语句的测试。 以下是一个简单的示例: ```java @Test public void testSql() { final String sql = "SELECT empno, ename FROM emp WHERE deptno = 10"; final String expected = "EMPNO=7369; ENAME=SMITH\n" + "EMPNO=7499; ENAME=ALLEN\n" + "EMPNO=7521; ENAME=WARD\n" + "EMPNO=7566; ENAME=JONES\n" + "EMPNO=7654; ENAME=MARTIN\n" + "EMPNO=7698; ENAME=BLAKE\n" + "EMPNO=7782; ENAME=CLARK\n" + "EMPNO=7839; ENAME=KING\n" + "EMPNO=7844; ENAME=TURNER\n" + "EMPNO=7876; ENAME=ADAMS\n" + "EMPNO=7900; ENAME=JAMES\n" + "EMPNO=7934; ENAME=MILLER\n"; CalciteAssert.that() .with("model", "inline:" + "{\n" + " version: '1.0',\n" + " defaultSchema: 'SCOTT',\n" + " schemas: [\n" + " {\n" + " type: 'custom',\n" + " name: 'SCOTT',\n" + " factory: 'org.apache.calcite.adapter.jdbc.JdbcSchema$Factory',\n" + " operand: {\n" + " jdbcDriver: 'org.hsqldb.jdbcDriver',\n" + " jdbcUrl: 'jdbc:hsqldb:res:/org/apache/calcite/test/Scott.sql',\n" + " jdbcUser: '',\n" + " jdbcPassword: ''\n" + " }\n" + " }\n" + " ]\n" + "}") .query(sql) .returns(expected); } ``` 这个示例中,我们使用了`CalciteAssert`类提供的`that()`方法来创建一个测试实例,然后使用`with()`方法来设置数据源的模型,使用`query()`方法来执行SQL语句,最后使用`returns()`方法来验证查询结果是否符合预期。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值