分享
说明
- 本博客每周五更新一次。
- 本文属于实战,讲解 Flink1.12 版本java代码使用时间窗口加水印实现,具体需求为5秒内用户订单总数、订单最大金额、最小金额
实现
讲解
- 代码结构分为5部分,
- 准备环境 env
- 数据输入 source
- 数据处理 transformation
- 创建水印、窗口执行任务
- 基于sql和table风格实现对应功能
- 数据输出 sink
- 启动任务 execute
代码
- 所有代码基于java1.8+flink1.12,环境搭建过程参照前几篇博客。
import static org.apache.flink.table.api.Expressions.$;
import static org.apache.flink.table.api.Expressions.lit;
import java.time.Duration;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
public class DataStreamToWindowsCount {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,settings);
DataStreamSource<Order> orderDs = env.addSource(new RichSourceFunction<Order>() {
private Boolean isRunning = true;
@Override
public void run(SourceContext<Order> ctx) throws Exception {
Random random=new Random();
while(isRunning) {
Order order =new Order(UUID.randomUUID().toString(),random.nextInt(3),random.nextInt(101),System.currentTimeMillis());
TimeUnit.SECONDS.sleep(1);
ctx.collect(order);
}
}
@Override
public void cancel() {
isRunning=false;
}
});
SingleOutputStreamOperator<Order> orderDSWithWatermark = orderDs.assignTimestampsAndWatermarks(WatermarkStrategy.<Order>forBoundedOutOfOrderness(Duration.ofSeconds(5))
.withTimestampAssigner( (order,recordTimestamp) -> order.getCreateTime())
);
tableEnv.createTemporaryView("t_order", orderDSWithWatermark,$("orderId"),$("userId"),$("money"),$("createTime").rowtime());
String sql="select userId,count(*) as orderCount,max(money) as maxMoney,min(money) as minMoney from t_order group by userId,tumble(createTime,Interval '5' SECOND)";
Table resultTable=tableEnv.sqlQuery(sql);
Table apiTable=tableEnv.from("t_order")
.window(Tumble.over(lit(5).second()).on($("createTime"))
.as("tumbleWindow"))
.groupBy($("tumbleWindow"),$("userId"))
.select($("userId"),
$("userId").count().as("totalCount"),
$("money").max().as("maxMoney"),
$("money").min().as("mixMoney"));
DataStream<Tuple2<Boolean, Row>> resultDS = tableEnv.toRetractStream(resultTable, Row.class);
DataStream<Tuple2<Boolean, Row>> resultAPI = tableEnv.toRetractStream(apiTable, Row.class);
resultDS.print("sql数据");
resultAPI.print("API");
env.execute("");
}
@Data
@NoArgsConstructor
@AllArgsConstructor
public static class Order{
public String orderId;
public Integer userId;
public Integer money;
public Long createTime;
}
}
总结
- Flink table 和 sql 使用时间窗口加水印,sql和API语法有点奇怪,不过功能真心强大,通过熟悉这些接口,封装后可在web开发拖拽组件,实现在线编辑。
- 学习任重道远,继续加油。