一、Flink Table API和SQL_API
### --- Flink Table API Flink SQL_API
~~~ Apache Flink提供了两种顶层的关系型API,分别为Table API和SQL,
~~~ Flink通过Table API&SQL实现了批流统一。
~~~ 其中Table API是用于Scala和Java的语言集成查询API,
~~~ 它允许以非常直观的方式组合关系运算符(例如select,where和join)的查询。
~~~ Flink SQL基于Apache Calcite 实现了标准的SQL,用户可以使用标准的SQL处理数据集。
~~~ Table API和SQL与Flink的DataStream和DataSet API紧密集成在一起,用户可以实现相互转化,
~~~ 比如可以将DataStream或者DataSet注册为table进行操作数据。
~~~ 值得注意的是,Table API and SQL目前尚未完全完善,还在积极的开发中,
~~~ 所以并不是所有的算子操作都可以通过其实现。
二、导入相关依赖
### --- 在pom.xml下导入依赖环境
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table</artifactId>
<version>1.11.1</version>
<type>pom</type>
<scope>provided</scope>
</dependency>
<!-- Either... -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>1.11.1</version>
<scope>provided</scope>
</dependency>
<!-- or... -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_2.12</artifactId>
<version>1.11.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>1.11.1</version>
<scope>provided</scope>
</dependency>
三、编程代码实现
### --- 编程代码实现:基于TableAPI的案例:
package com.yanqi.table;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.execution.PipelineExecutor;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.table.api.*;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.*;
import org.apache.flink.table.types.DataType;
import org.apache.flink.types.Row;
import org.apache.flink.util.CloseableIterator;
import static org.apache.flink.table.api.Expressions.$;
public class TableApiDemo {
public static void main(String[] args) throws Exception {
//Flink执行环境env
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//用env,做出Table环境tEnv
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//table环境的重载方法:
/*EnvironmentSettings settings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
// .inBatchMode()
.inStreamingMode()
.build();
StreamTableEnvironment.create(env,settings);*/
//获取流式数据源
/*DataStreamSource<Tuple2<String, Integer>> data = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
int num = 0;
while (true) {
num++;
ctx.collect(new Tuple2<>("name"+num, num));
Thread.sleep(1000);
}
}
@Override
public void cancel() {
}
});*/
//从kafka上获取数据
/*ConnectTableDescriptor descriptor = tEnv.connect(
// declare the external system to connect to
new Kafka()
.version("universal")
.topic("animal")
.startFromEarliest()
.property("bootstrap.servers", "hdp-2:9092")
)
// declare a format for this system
.withFormat(
// new Json()
new Csv()
)
// declare the schema of the table
.withSchema(
new Schema()
// .field("rowtime", DataTypes.TIMESTAMP(3))
// .rowtime(new Rowtime()
// .timestampsFromField("timestamp")
// .watermarksPeriodicBounded(60000)
// )
// .field("user", DataTypes.BIGINT())
.field("message", DataTypes.STRING())
);
// create a table with given name
descriptor.createTemporaryTable("MyUserTable");
Table table1 = tEnv.sqlQuery("select * from MyUserTable");
DataStream<Tuple2<Boolean, Row>> tuple2DataStream = tEnv.toRetractStream(table1, Row.class);
tuple2DataStream.print();*/
tEnv.connect(new FileSystem().path("d:\\data\\input"))// 定义表数据来源,外部连接
.withFormat(new Csv()) // 定义从外部系统读取数据之后的格式化方法
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("name"