读取本地CSV文件流处理,按表处理
pom
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner-blink_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-csv</artifactId> <version>1.10.1</version> </dependency>
package com.yrl.table;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Csv;
import org.apache.flink.table.descriptors.FileSystem;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
public class Test03 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//1.1基于老版本planner的流处理
EnvironmentSettings oldEnvSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,oldEnvSettings);
//2.1创建表
String path="D:\\大数据组件API\\Flink\\Flink01\\src\\main\\resources\\test.txt";
tableEnv.connect(new FileSystem().path(path)) //创建连接
.withFormat(new Csv()) //数据格式
.withSchema(new Schema() //数据结构,顺序一致
.field("id", DataTypes.STRING())
.field("timestamp", DataTypes.BIGINT())
.field("temperature", DataTypes.DOUBLE())
).createTemporaryTable("inputTable");
Table inputTable = tableEnv.from("inputTable");
inputTable.printSchema();
//3.1Table API
Table res1 = inputTable.select("id,temperature").where("id='sensor_6'");
Table res2 = inputTable.groupBy("id").select("id,id.count as count,temperature.avg as tempAvg");
//3.2SQL
String sql ="select id ,count(*) as `count` from inputTable group by id";
Table res3 = tableEnv.sqlQuery(sql);
tableEnv.toAppendStream(res1, Row.class).print("res1");
tableEnv.toRetractStream(res2, Row.class).print("res2");
tableEnv.toRetractStream(res3, Row.class).print("res3");
//4.1表的输出
String path2="D:\\大数据组件API\\Flink\\Flink01\\src\\main\\resources\\test2.csv";
tableEnv.connect(new FileSystem().path(path2)) //创建连接
.withFormat(new Csv()) //数据格式
.withSchema(new Schema() //数据结构,顺序一致
.field("id", DataTypes.STRING())
.field("temp", DataTypes.DOUBLE())
).createTemporaryTable("outputTable");
//res2,res3,聚合操作有更新,不支持直接写入文件
res1.insertInto("outputTable");
env.execute();
}
}