Calcite 使用原生的RDD 处理Spark


1   通过配置 :  

properties.setProperty("spark", "true");  //可以执行调用内部的函数

2  需要修改 Calcite-spark  中相关的文件

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.calcite.adapter.spark;

import org.apache.calcite.adapter.enumerable.EnumerableRules;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.linq4j.tree.ClassDeclaration;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.ArrayBindable;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.javac.JaninoCompiler;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Calendar;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Implementation of
 * {@link org.apache.calcite.jdbc.CalcitePrepare.SparkHandler}. Gives the core
 * Calcite engine access to rules that only exist in the Spark module.
 */
public class SparkHandlerImpl implements CalcitePrepare.SparkHandler {
  private final HttpServer classServer;
  private final AtomicInteger classId;

  private final SparkSession spark ;
  private

3   直接可以使用例子如下:

package org.apache.calcite.test;

import java.io.Serializable;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Properties;

import org.apache.calcite.jdbc.CalciteConnection;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.jdbc.CalcitePrepare.SparkHandler;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;


/**
 * @Auther: caozq
 * @Date: 2018/5/14 10:59
 * @Description:
 */
public class SparkTest {

	 public static class Person implements Serializable {
	        private static final long serialVersionUID = -6259413972682177507L;
	        private String name;
	        private int age;
	        
	        public Person(String name, int age) {
	            this.name = name;
	            this.age = age;
	        }
	        public String toString() {
	            return name + ": " + age;
	        }
	        public String getName() {
	            return name;
	        }
	        public void setName(String name) {
	            this.name = name;
	        }
	        public int getAge() {
	            return age;
	        }
	        public void setAge(int age) {
	            this.age = age;
	        }
	    }
	 
	public static void getDate(String sql) {
		
		Properties properties = new Properties();
		properties.setProperty("spark", "true");
		CalciteConnection calciteConnection = null;

		try {
			Class.forName("org.apache.calcite.jdbc.Driver");
			Connection aConnection = DriverManager.getConnection("jdbc:calcite:", properties);

			DatabaseMetaData metaData = aConnection.getMetaData();
			System.out.println("productName="+metaData.getDatabaseProductName());
			calciteConnection = aConnection.unwrap(CalciteConnection.class);
			CalcitePrepare.Context context = calciteConnection.createPrepareContext();

			SparkHandler sparkHandler = context.spark();
			JavaSparkContext sparkcontext = (JavaSparkContext) sparkHandler.sparkContext();

	        JavaRDD<String> input = sparkcontext.parallelize(Arrays.asList("abc,1", "test,2"));
	        JavaRDD<Person> persons = input.map(s -> s.split(",")).map(s -> new Person(s[0], Integer.parseInt(s[1])));
	        
	        System.out.println(persons.collect());
	        SparkSession spark = SparkSession.builder().appName("Test").getOrCreate();

	        Dataset<Row> df = spark.createDataFrame(persons, Person.class);
	        df.show();
	        
	        
	        df.printSchema();
	        SQLContext sqls = new SQLContext(spark);
	        sqls.registerDataFrameAsTable(df, "person");


	        sqls.sql("SELECT * FROM person WHERE age>1").show();
	        
	        sparkcontext.close();
	        
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				calciteConnection.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}

	public static void main(String[] args) {
		String sql = "select * from test";
		getDate(sql);
	}

}


Apache Calcite是一个灵活的SQL解析器框架,可以用于解析和优化SQL查询语句,支持多种数据库,包括MySQL、Oracle、SQL Server、PostgreSQL等。下面是Apache Calcite使用方法: 1. 引入依赖 在项目的pom.xml文件中添加Apache Calcite的依赖: ```xml <dependency> <groupId>org.apache.calcite</groupId> <artifactId>calcite-core</artifactId> <version>1.26.0</version> </dependency> ``` 2. 创建SQL解析器 使用Apache Calcite的SQL解析器,可以将SQL语句解析成AST(抽象语法树)。AST是一种用于表示SQL语句结构的数据结构,可以用于进一步分析和优化SQL查询语句。 ```java import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.parser.SqlParserConfig; import org.apache.calcite.sql.parser.SqlParserImplFactory; public class SqlParserDemo { public static void main(String[] args) throws SqlParseException { String sql = "SELECT * FROM my_table WHERE id = 1"; SqlParserConfig config = SqlParser.configBuilder() .setParserFactory(new SqlParserImplFactory()) .build(); SqlParser parser = SqlParser.create(sql, config); SqlNode node = parser.parseQuery(); System.out.println(node.getClass().getSimpleName()); } } ``` 以上代码演示了如何创建一个SQL解析器,解析一个SELECT语句,并输出AST的类型。 3. 访问AST AST是一个树形结构,可以使用Visitor模式来访问AST的节点。Apache Calcite提供了许多访问AST节点的Visitor类,可以方便地遍历AST的节点。 ```java import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.parser.SqlParserConfig; import org.apache.calcite.sql.parser.SqlParserImplFactory; public class SqlParserDemo { public static void main(String[] args) throws SqlParseException { String sql = "SELECT * FROM my_table WHERE id = 1"; SqlParserConfig config = SqlParser.configBuilder() .setParserFactory(new SqlParserImplFactory()) .build(); SqlParser parser = SqlParser.create(sql, config); SqlNode node = parser.parseQuery(); node.accept(new SqlBasicVisitor<Void>() { @Override public Void visit(SqlIdentifier id) { System.out.println(id.getName()); return null; } }); } } ``` 以上代码演示了如何访问AST的节点,使用SqlBasicVisitor类来访问SqlIdentifier节点,并输出节点的名称。 4. 优化查询 AST可以用于进一步优化SQL查询语句。Apache Calcite提供了许多优化器,可以根据AST的结构进行优化,例如选择最优的执行计划、推导查询条件、消除冗余计算等。 ```java import org.apache.calcite.adapter.java.JavaTypeFactory; import org.apache.calcite.plan.*; import org.apache.calcite.prepare.CalcitePrepareImpl; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.parser.SqlParserConfig; import org.apache.calcite.sql.parser.SqlParserImplFactory; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.tools.*; import org.apache.calcite.util.Util; public class SqlParserDemo { public static void main(String[] args) throws SqlParseException { String sql = "SELECT * FROM my_table WHERE id = 1"; SchemaPlus schema = Frameworks.createRootSchema(true); FrameworkConfig config = Frameworks.newConfigBuilder() .defaultSchema(schema) .parserConfig(SqlParser.configBuilder() .setParserFactory(new SqlParserImplFactory()) .build()) .build(); Planner planner = Frameworks.getPlanner(config); SqlNode node = planner.parse(sql); SqlValidator validator = planner.getValidator(); SqlNode validatedNode = validator.validate(node); RelDataTypeFactory typeFactory = planner.getTypeFactory(); JavaTypeFactory javaTypeFactory = new JavaTypeFactoryImpl(typeFactory); SqlToRelConverter.Config converterConfig = SqlToRelConverter.configBuilder() .withTrimUnusedFields(false) .build(); SqlToRelConverter converter = new SqlToRelConverter( new CalcitePrepareImpl.PlannerImpl(planner, converterConfig), validator, schema, javaTypeFactory, converterConfig); RelNode relNode = converter.convertQuery(validatedNode, false, true); RelOptPlanner optPlanner = relNode.getCluster().getPlanner(); optPlanner.setRoot(relNode); RelTraitSet traitSet = optPlanner.emptyTraitSet().plus(RelCollations.EMPTY); RelNode optimizedNode = optPlanner.findBestExp(traitSet, relNode); System.out.println(Util.toLinux(optimizedNode.toString())); } } ``` 以上代码演示了如何使用Apache Calcite进行SQL查询语句的优化。首先创建一个Planner对象,然后使用Planner解析和验证SQL语句。接着使用SqlToRelConverter将SQL语句转换为RelNode对象,最后使用RelOptPlanner进行优化,得到最优的执行计划。 以上就是Apache Calcite使用方法,你可以根据需要使用它来解析和优化SQL查询语句。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值