flink1.11的blink planner
数据流向:csv->mysql->flink流加工->mysql
工程目录
StreamSql类。单纯用flinkSql做数据处理。
package com.test.demo02_table;
import com.test.demo02_table.Sensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
//flink1.10时候是import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import static org.apache.flink.table.api.Expressions.*;
import java.sql.Timestamp;
/**
* 目前flink1.11,回不去flink1.10
*/
public class StreamSql {
public static void main(String[] args) throws Exception {
/**
* 1 注册环境
*/
EnvironmentSettings mySetting = EnvironmentSettings
.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, mySetting);
/**
* 2 数据源
*/
DataStream<String> inputsteam = env .readTextFile("D:\\Workplace\\IdeaProjects\\other_project\\flink-learning\\TableApiAndSql\\src\\main\\resources\\datafile\\temperature.txt");
// inputsteam.print();
DataStream<Sensor> dataStream = inputsteam.map(new MapFunction<String, Sensor>() {
@Override
public Sensor map(String s) throws Exception {
String[] split = s.split(",");
return new Sensor(
String.valueOf(split[0]),
Timestamp.valueOf(split[1]),
Double.valueOf(split[2]),
Double.valueOf(split[3])
);
}
});
//@deprecated 不推荐 tableEnv.registerTable("TheDataTable",dataTable);。旧版用法现在是创建表或者view
tableEnv.createTemporaryView("TheDataTable",dataStream);
/**
* 4 转换回数据流,打印输出.
*/
/**
* 4,1 flink_sql_jdbc输出
*/
//flink1.10中'connector.driver' 可以写也可以不写。optional: the class name of the JDBC driver to use to connect to this URL. If not set, it will automatically be derived from the URL.
//create中VARCHAR(10)等长度一直不规范,DOUBLE()肯定报错,DECIMAL(5,2)也报错.总结数据库用DECIMAL,java用double就可以
//类型需要内部保持一致,而不是和数据库类型保持一致。如数据库是TIMESTAMP(6),但这个类处理内部是TIMESTAMP(3)
String sinkFlink1_10_DDL =
"CREATE TABLE jdbcOutputTable ( "
+ " tab STRING "
// + " tab VARCHAR "
+ " ,record_time TIMESTAMP(3) "
+ " ,temperature DOUBLE "
+ " ,humidity DOUBLE "
// + " , humidity DECIMAL(5,2) "
+ ") WITH ( "
+ " 'connector.type' = 'jdbc', "
+ " 'connector.url' = 'jdbc:mysql://192.168.109.139:3306/kgxdb', "
+ " 'connector.table' = 'sensor', "
+ " 'connector.username' = 'root', "
+ " 'connector.password' = 'qwertyuiop1234567890', "
+ " 'connector.write.flush.max-rows' = '1' "
+ ")";
//flink1.11的新版create
String sinkFlink1_11_DDL =
"CREATE TABLE jdbcOutputTable ( \n" +
" tab STRING, \n" +
" record_time TIMESTAMP(3), \n" +
" temperature DOUBLE, \n" +
" humidity DOUBLE, \n" +
" PRIMARY KEY (tab) NOT ENFORCED " +
") WITH ( \n" +
" 'connector' = 'jdbc', \n" +
" 'url' = 'jdbc:mysql://192.168.109.139:3306/kgxdb',\n" +
" 'table-name' = 'sensor', \n" +
" 'driver' = 'com.mysql.jdbc.Driver', \n" +
" 'username' = 'root', \n" +
" 'password' = 'qwertyuiop1234567890' \n" +
")";
tableEnv.executeSql(sinkFlink1_11_DDL);
/**
* 3.1 TableSql
*/
// Table ResultSql_Table = tableEnv.sqlQuery("select tab,record_time,temperature,humidity from TheDataTable where tab in ('sensor_1','sensor_2')");
// String explain = tableEnv.explain(ResultSql_Table); System.out.println("执行计划为\n"+explain);
// ResultSql_Table.insertInto("jdbcOutputTable");
/**
* 3.2 优化3.1
*/
//flink1.10的版本
// tableEnv.executeSql( "INSERT INTO jdbcOutputTable " +
// "SELECT tab,record_time,temperature,humidity FROM TheDataTable WHERE tab IN ('sensor_1','sensor_2','sensor_3')");
//flink1.11的TableResult
TableResult tableResult1 = tableEnv.executeSql( "INSERT INTO jdbcOutputTable " +
"SELECT tab,record_time,temperature,humidity FROM TheDataTable WHERE tab IN ('sensor_1','sensor_2','sensor_3')");
System.out.println(tableResult1.getJobClient().get().getJobStatus());
/**
* 4.2 flink sql_jdbc输入。目前flink1.10的DDL不支持primary key。也没必要。
*/
String sinkFlink1_10_DDL1 =
"CREATE TABLE jdbcOutputTable1 ( "
+ " id DOUBLE "
+ " ,name STRING "
+ " ,total BIGINT "
// + " ,primary key (id) "
+ ") WITH ( "
+ " 'connector.type' = 'jdbc', "
+ " 'connector.url' = 'jdbc:mysql://192.168.109.139:3306/kgxdb', "
+ " 'connector.table' = 'report', "
+ " 'connector.username' = 'root', "
+ " 'connector.password' = 'qwertyuiop1234567890' "
+ ")";
//flink1.11的新版create
String sinkFlink1_11_DDL1 =
"CREATE TABLE jdbcOutputTable1 ( \n" +
" id DOUBLE, \n" +
" name STRING , \n" +
" total BIGINT, \n" +
" PRIMARY KEY (id) NOT ENFORCED " +
") WITH ( \n" +
" 'connector' = 'jdbc', \n" +
" 'url' = 'jdbc:mysql://192.168.109.139:3306/kgxdb',\n" +
" 'table-name' = 'report', \n" +
" 'driver' = 'com.mysql.jdbc.Driver', \n" +
" 'username' = 'root', \n" +
" 'password' = 'qwertyuiop1234567890' \n" +
")";
tableEnv.executeSql(sinkFlink1_11_DDL1);
//flink1.10时代:sqlQuery()不支持"INSERT INTO table—a SELECT * from table—a
//只能两层select。否则会报 ”UpsertStreamTableSink requires that Table has a full primary keys if it is updated.“
Table ResultSql_Table1 = tableEnv.sqlQuery(
"SELECT id,name,total FROM (" +
"SELECT floor( rand()*100 ) AS id,tab AS name,COUNT(1) AS total " +
"FROM jdbcOutputTable where tab in ('sensor_1','sensor_2') group by tab )");
ResultSql_Table1.printSchema();
String explain1 = tableEnv.explain(ResultSql_Table1);
System.out.println("执行计划为"+explain1);
ResultSql_Table1.executeInsert("jdbcOutputTable1");
//flink1.10。不能像下面这么玩。会报”SQL parse failed. Encountered "jdbcOutputTable" at line 1, column 123.“
/* tableEnv.sqlUpdate("insert into jdbcOutputTable1 " +
"SELECT id,name,total FROM (" +
"SELECT floor( rand()*100 ) AS id,tab AS name,COUNT(1) AS total " +
"FROM jdbcOutputTable where tab in ('sensor_1','sensor_2' ) group by tab ) as t");*/
/**
* 6.
*/
env.execute("Start Table sql for Stream");
}
}
tableApi 1.11版本处理CSV
$("temperature").sum(), //需要改为 $("temperature"),
//否则会报org.apache.flink.table.api.ValidationException: Cannot resolve field [tab], input field list:[tab, EXPR$0].这是因为sum的操作只对其中一个字段,别的字段没有sum操作。
//建议能用SQL千万别用tableApi,学不来,学不来,还是SQL问题直观。
package com.test.demo02_table;
import com.test.demo02_table.Sensor;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
//flink1.10时候是import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.api.*;
import static org.apache.flink.table.api.Expressions.*;
import org.apache.flink.table.descriptors.Csv;
import org.apache.flink.table.descriptors.FileSystem;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.util.Collector;
import java.sql.Timestamp;
import java.sql.Types;
/*
* (1)当 Table 被转换成 DataStream 时(参阅与 DataStream 和 DataSet API 结合)。转换完成后,它就成为一个普通的 DataStream 程序,
* 并会在调用 StreamExecutionEnvironment.execute() 时被执行。注意 从 1.11 版本开始,sqlUpdate 方法 和 insertInto 方法被废弃,
* (2)从这两个方法构建的 Table 程序必须通过 StreamTableEnvironment.execute() 方法执行,
* 而不能通过 StreamExecutionEnvironment.execute() 方法来执行。
* */
public class StreamTableApi {
public static void main(String[] args) throws Exception {
/**
* 1 注册环境
*/
EnvironmentSettings mySetting = EnvironmentSettings
.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);//本案例并行度别多开,会生成奇怪的文件。
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, mySetting);
/**
* 2 数据源
*/
DataStream<String> inputsteam = env .readTextFile("D:\\Workplace\\IdeaProjects\\other_project\\flink-learning\\TableApiAndSql\\src\\main\\resources\\datafile\\temperature.txt");
// inputsteam.print();
/* //对比scala
val dataStream : DataStream[TheSensor] = inputsteam
.map( data => {
val dataArray = data.split(",")
//TheSensor( dataArray(0),dataArray(1).toString,dataArray(2).toDouble )
TheSensor( dataArray(0).toString,dataArray(1).toDouble )
})*/
DataStream<Sensor> dataStream = inputsteam.map(new MapFunction<String, Sensor>() {
@Override
public Sensor map(String s) throws Exception {
String[] split = s.split(",");
return new Sensor(
String.valueOf(split[0]),
Timestamp.valueOf(split[1]),
Double.valueOf(split[2]),
Double.valueOf(split[3])
);
}
});
/**
* 2.2 没啥用的分割,因为没有属性,此时Tuple4,不如实体类
*/
// env.socketTextStream("localhost", 9999).flatMap()//官网例子
DataStream<Tuple4<String, Timestamp,Double,Double>> dataStream1 = env
.readTextFile("D:\\Workplace\\IdeaProjects\\other_project\\flink-learning\\TableApiAndSql\\src\\main\\resources\\datafile\\temperature.txt")
.flatMap(new mySplitter())
// .keyBy(0)
// .timeWindow(Time.seconds(5))
// .sum(1)
;
dataStream1.print();
/**
* 3 TableAPI处理输入数据
*/
Table dataTable = tableEnv.fromDataStream(dataStream);
/**
*4 转换回数据流,打印输出.
*/
/*
scala 写法val resultStream : DataStream[(String,Double)] =resultTable.toAppendStream[(String,Double)];
java没思路 DataStream<Tuple2<String,Double>> resultStream = tableEnv.toAppendStream(resultTable);
resultStream.print("Result!");*/
/**
* 4.1 转换回数据流,打印输出.
* 定义到文件系统的连接 ; 定义以CSV进行数据格式化 ; new Schema()定义表结构。演示完记得删除生成的文件,否则会报错。
*/
// create a schema of output Table
final Schema myschema = new Schema()
.field("tab", DataTypes.STRING())
.field("record_time",DataTypes.TIMESTAMP())
.field("temperature",DataTypes.DOUBLE())
.field("humidity",DataTypes.DECIMAL(5,2));
tableEnv.connect(new FileSystem().path("D:\\Workplace\\IdeaProjects\\other_project\\flink-learning\\TableApiAndSql\\src\\main\\resources\\datafile\\theFilteTemperature.txt"))
.withFormat(new Csv())
.withSchema(new Schema()
.field("tab", DataTypes.STRING())
.field("record_time",DataTypes.TIMESTAMP())
.field("temperature",DataTypes.DOUBLE())
.field("humidity",DataTypes.DECIMAL(5,2))
)
// .withSchema(myschema)//也可以将schema在外面创建,但是记得用final
.inAppendMode() //flink1.11新加
.createTemporaryTable("outputTable");
/* Table resultTable= dataTable
.filter("tab ==='sensor_1'")
.select("tab,record_time,temperature,humidity");//flink1.10的版本写法*/
Table resultTable=tableEnv.from("outputTable")
// .filter($("tab").isEqual("sensor_1"))
// .groupBy($("tab"), $("record_time"))
.select($("tab"),
$("record_time"),
$("temperature").sum(),
$("humidity"));
// table只有一个打印方法,Schema这里代表这个表的组织架构,
// root
// |-- tab: STRING
// |-- record_time: TIMESTAMP(3)
// |-- temperature: DOUBLE
// |-- humidity: DOUBLE
resultTable.printSchema();
//resultTable.executeInsert("outputTable");//flink1.10的写法是resultTable.insertInto("outputTable");
/**
* 5.
*/
env.execute("Start Table Api for Stream");
}
/**
* @author keguoxin
* @date 2020-05-10
* @desc
*/
public static class mySplitter implements FlatMapFunction<String, Tuple4<String, Timestamp,Double,Double>> {
@Override
public void flatMap(String sentence, Collector<Tuple4<String, Timestamp,Double,Double>> out) throws Exception {
for (String word: sentence.split(",")) {
int a =0;
System.out.println("The word is " +word);
a += 1;
}
String[] splitResult = sentence.split(",");
out.collect(new Tuple4<String, Timestamp,Double,Double>(
String.valueOf(splitResult[0]),
Timestamp.valueOf(splitResult[1]),
Double.valueOf(splitResult[2]),
Double.valueOf(splitResult[3])
));
}
}
//官网https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/datastream_api.html原生的分割
//感觉像处理一行,即 a b c d 会被整为 [[a,1],[b,1],[c,1],[d,1]]
public static class Splitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
@Override
public void flatMap(String sentence, Collector<Tuple2<String, Integer>> out) throws Exception {
for (String word: sentence.split(" ")) {
out.collect(new Tuple2<String, Integer>(word, 1));
}
}
}
}
依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flink-learning</artifactId>
<groupId>com.test</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>TableApiAndSql</artifactId>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<compiler.version>1.8</compiler.version>
<flink.version>1.11.1</flink.version>
<java.version>1.8</java.version>
<hive.version>1.2.1</hive.version>
<scala.binary.version>2.12</scala.binary.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!--flink-table-planner_包括了flink-table-common和flink-table-api-java-bridge-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<!--在flink1.10升级1.11时遇到了No ExecutorFactory found to execute the application错误。因此需要下面的依赖。-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<!--flink1.10.1使用的依赖-->
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-jdbc_2.12</artifactId>-->
<!-- <version>1.10.1</version>-->
<!-- </dependency>-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.12</artifactId>
<version>1.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>1.11.1</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!-- </dependency>-->
<!--用到自定义函数或和kafka做链接,需要SQL CLient 即以下的依赖.已在flink-table-planner_被包括-->
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-table-common</artifactId>-->
<!-- <version>1.11.1</version>-->
<!-- </dependency>-->
<!--加入下面两个依赖才会出现 Flink 的日志出来-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>com.github.noraui</groupId>
<artifactId>ojdbc8</artifactId>
<version>12.2.0.1</version>
</dependency>
<dependency>
<groupId>com.ibm.db2</groupId>
<artifactId>db2jcc</artifactId>
<version>3.72.44</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.19</version>
</dependency>
</dependencies>
<!-- This profile helps to make things run out of the box in IntelliJ -->
<!-- Its adds Flink's core classes to the runtime class path. -->
<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' -->
<profiles>
<profile>
<id>add-dependencies-for-IDEA</id>
<activation>
<property>
<name>idea.version</name>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
测试数据。
"D:\\Workplace\\IdeaProjects\\other_project\\flink-learning\\TableApiAndSql\\src\\main\\resources\\datafile\\temperature.txt");"
sensor_1,2020-05-11 02:14:41,23.1,79.00,1589134481
sensor_1,2020-05-11 02:15:41,25.6,78.90,1589134541
sensor_1,2020-05-11 02:16:41,24.6,79.00,1589134601
sensor_1,2020-05-11 02:17:41,25.1,79.10,1589134661
sensor_1,2020-05-11 02:18:41,25.1,79.10,1589134721
sensor_1,2020-05-11 02:19:41,25.1,79.10,1589134781
sensor_1,2020-05-11 02:20:41,25.1,79.10,1589134841
sensor_2,2020-05-11 02:14:41,24.6,79.00,1589134481
sensor_2,2020-05-11 02:15:41,25.1,79.10,1589134541
sensor_2,2020-05-11 02:16:41,25.6,79.00,1589134601