flink中关于时间的三个概念:
event time:
事件时间: 数据产生的时间
processing time:
处理时间:当前机器处理该条事件的时间
ingest time:
摄入时间:事件进入flink的时间
watermark是用于处理flink乱序事件的,通常用watermark机制结合window来实现。
案例:
窗口 [19:34:30, 19:34:33] ==> window_start_time:19:34:30, window_end_time: 19:34:33, 时间为三秒的窗口
发送的数据时间戳:
第一条 19:34:29
第二条 19:34:39
第三条 19:34:43
window的触发要符合以下几个条件:
1、watermark时间 >= window_end_time
2、在[window_start_time,window_end_time]中有数据存在
同时满足了以上2个条件,window才会触发。
当数据发送到第二条的时候:
watermark时间(19:34:29) < window_end_time(19:34:33),因此不能触发window。
当数据发送到第三条的时候:
再次输入一条19:34:43的数据,此时watermark时间会升高到19:34:33,这时的window就会触发
注释:
watermark时间:
watermark = Event Time - maxOutOfOrderness
Event Time:用户数据源自带的时间戳
private final long maxOutOfOrderness = 10000; // 10 seconds watermark设置的延迟时间
package com.flink;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.flink.config.flinkConstants;
import com.flink.utils.emqtt.EmqttSource;
import com.flink.utils.mysql.JdbcReader;
import com.flink.utils.mysql.JdbcWriter;
import com.flink.utils.mysql.JdbcWriterAsyncFunction;
import com.google.gson.Gson;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.Partitioner;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.async.AsyncFunction;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
public class emqttFlinkMain {
private static Map<String, String> DeviceMap = new Hashtable<String, String>();
public static void main(String[] args) throws Exception {
flinkConstants fc = flinkConstants.getInstance();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
env.enableCheckpointing(2000);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // 步骤1
//emqtt
DataStream<Tuple2<String, String>> inputStream = env.addSource(new EmqttSource());
/**
* 数据类型
*/
DataStream<deviceData> dataStream = inputStream
.rebalance()
.flatMap(new FlatMapFunction<Tuple2<String, String>, deviceData>() {
@Override
public void flatMap(Tuple2<String, String> value, Collector<deviceData> out) {
String message = value.f0;
String topic = value.f1;
List<deviceData> d = DataHandle(message, topic);
for (deviceData line : d) {
out.collect(line);
}
}
});
//写入redis
SingleOutputStreamOperator<Tuple2<String, String>> keyedStream = dataStream
.assignTimestampsAndWatermarks(new TimeLagWatermarkGenerator()) // 步骤二
.map(new MapFunction<deviceData, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> map(deviceData value) {
String key = value.compID + "/" + value.machID + "/" + value.operationValue;
return Tuple2.of(key, value.operationData);
}
})
keyedStream.print();
env.execute("EmqttFlinkMain");
}
private static List<deviceData> DataHandle(String message, String topic) {
List<deviceData> d = new ArrayList<>();
topic = "3333/D4:36:39:1A:0D:D3/Send/Data/FOCAS";
try {
JSONObject DataObject = JSON.parseObject(message);
String dataType = (String) DataObject.get("type");
if (dataType.equals("Data") || dataType.equals("data")) {
String[] array = topic.split("/");
JSONArray dataList = JSON.parseArray(DataObject.get("values").toString());
String machID = DeviceMap.get(array[1]);
if (machID != null) {
for (int i = 0; i < dataList.size(); i++) {
deviceData d1 = new deviceData();
JSONObject dataDict = dataList.getJSONObject(i);
d1.machID = machID;
d1.compID = array[0];
d1.gateMac = array[1];
d1.operationValue = dataDict.get("name").toString();
d1.operationData = dataDict.get("data").toString();
d1.gatherTime = dataDict.get("time").toString();
long unixTimestamp;
if(d1.gatherTime.length()>20){
long ms =Long.parseLong(d1.gatherTime.substring(20, 23));
Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(d1.gatherTime);
unixTimestamp = date.getTime() + ms;
}else {
Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(d1.gatherTime);
unixTimestamp = date.getTime();
}
d1.timestamp = unixTimestamp;
d.add(d1);
}
} else {
System.out.println("无法解析数据");
}
}
} catch (Throwable t) {
t.printStackTrace();
}
return d;
}
}
// 步骤三
class TimeLagWatermarkGenerator implements AssignerWithPeriodicWatermarks<deviceData> {
private final long maxOutOfOrderness = 3500; // 3.5 seconds
private long currentMaxTimestamp;
@Override
public long extractTimestamp(deviceData element, long previousElementTimestamp) {
long timestamp = element.timestamp;
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
return timestamp;
}
@Override
public Watermark getCurrentWatermark() {
// return the watermark as current highest timestamp minus the out-of-orderness bound
return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
}
}