Flink table API
需要用到的依赖:
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
代码实现:
流:
DataStreamSource<WaterSensor> waterSensorStream =
env.fromElements(new WaterSensor("sensor_1", 1000L, 10),
new WaterSensor("sensor_1", 2000L, 20),
new WaterSensor("sensor_2", 3000L, 30),
new WaterSensor("sensor_1", 4000L, 40),
new WaterSensor("sensor_1", 5000L, 50),
new WaterSensor("sensor_2", 6000L, 60));
1.创建执行环境:
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
2.把流转换为动态表
Table table = tableEnv.fromDataStream(waterSensorStream);
//打印查看一下
table.printScheam();
3.在表上执行连续查询
Table resultTable = table
.where($("id").isEqual("sensor_1"))
.select($("id"), $("ts"), $("vc"));
4.把动态表转换为流进行输出
DataStream<Row> resultStream = tableEnv.toAppendStream(resultTable, Row.class);
resultStream.print();
env.execute();
完整案例:
import com.atguigu.flink.java.chapter_5.WaterSensor;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class Flink01_TableApi_BasicUse {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStreamSource<WaterSensor> waterSensorStream =
env.fromElements(new WaterSensor("sensor_1", 1000L, 10),
new WaterSensor("sensor_1", 2000L, 20),
new WaterSensor("sensor_2", 3000L, 30),
new WaterSensor("sensor_1", 4000L, 40),
new WaterSensor("sensor_1", 5000L, 50),
new WaterSensor("sensor_2", 6000L, 60));
// 1. 创建表的执行环境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 2. 创建表: 将流转换成动态表. 表的字段名从pojo的属性名自动抽取
Table table = tableEnv.fromDataStream(waterSensorStream);
// 3. 对动态表进行查询
Table resultTable = table
.where($("id").isEqual("sensor_1"))
.select($("id"), $("ts"), $("vc"));
// 4. 把动态表转换成流
DataStream<Row> resultStream = tableEnv.toAppendStream(resultTable, Row.class);
resultStream.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
补充:
import static 静态导入
/* 意思是导入这个类里的静态成员(静态方法、静态变量)在这个类中,就可以直接用方法名调用静态方法,而不必用“类名.方法名()” 的方式来调用。
缺点:
1.静态导入可能会让代码更加难以阅读
2.如果同时导入的两个类中又有重命名的静态成员,会出现编译器错误。
*/
import static org.apache.flink.table.api.Expressions.$;
聚合操作
// 1. 对动态表进行查询
Table resultTable = table
.where($("vc").isGreaterOrEqual(20))
.groupBy($("id"))
.aggregate($("vc").sum().as("vc_sum"))
.select($("id"), $("vc_sum"));
// 2. 把动态表转换成流 如果涉及到数据的更新, 要用到撤回流. 多个了一个boolean标记
DataStream<Tuple2<Boolean, Row>> resultStream = tableEnv.toRetractStream(resultTable, Row.class);