flink watermark

flink中关于时间的三个概念:
event time:

     事件时间:  数据产生的时间
processing time:

     处理时间:当前机器处理该条事件的时间
ingest time:

     摄入时间:事件进入flink的时间

 

watermark是用于处理flink乱序事件的,通常用watermark机制结合window来实现。


案例: 

    窗口 [19:34:30, 19:34:33]   ==> window_start_time:19:34:30,  window_end_time: 19:34:33, 时间为三秒的窗口

发送的数据时间戳:

 第一条    19:34:29

 第二条    19:34:39

 第三条    19:34:43

 

window的触发要符合以下几个条件:

1、watermark时间 >= window_end_time

 2、在[window_start_time,window_end_time]中有数据存在

同时满足了以上2个条件,window才会触发。

当数据发送到第二条的时候:

watermark时间(19:34:29) < window_end_time(19:34:33),因此不能触发window。

当数据发送到第三条的时候:

再次输入一条19:34:43的数据,此时watermark时间会升高到19:34:33,这时的window就会触发

注释:

watermark时间:

watermark = Event Time - maxOutOfOrderness

Event Time:用户数据源自带的时间戳

private final long maxOutOfOrderness = 10000; // 10 seconds    watermark设置的延迟时间

 

package com.flink;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.flink.config.flinkConstants;
import com.flink.utils.emqtt.EmqttSource;
import com.flink.utils.mysql.JdbcReader;
import com.flink.utils.mysql.JdbcWriter;
import com.flink.utils.mysql.JdbcWriterAsyncFunction;
import com.google.gson.Gson;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.Partitioner;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.async.AsyncFunction;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;


public class emqttFlinkMain {
    private static Map<String, String> DeviceMap = new Hashtable<String, String>();

    public static void main(String[] args) throws Exception {
        flinkConstants fc = flinkConstants.getInstance();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();

        env.enableCheckpointing(2000);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);  // 步骤1


        //emqtt
        DataStream<Tuple2<String, String>> inputStream = env.addSource(new EmqttSource());

        /**
         *  数据类型
         */
        DataStream<deviceData> dataStream = inputStream
                .rebalance()
                .flatMap(new FlatMapFunction<Tuple2<String, String>, deviceData>() {
                    @Override
                    public void flatMap(Tuple2<String, String> value, Collector<deviceData> out) {
                        String message = value.f0;
                        String topic = value.f1;
                        List<deviceData> d = DataHandle(message, topic);
                        for (deviceData line : d) {
                            out.collect(line);
                        }
                    }
                });


        //写入redis
        SingleOutputStreamOperator<Tuple2<String, String>> keyedStream = dataStream
                .assignTimestampsAndWatermarks(new TimeLagWatermarkGenerator())  // 步骤二
                .map(new MapFunction<deviceData, Tuple2<String, String>>() {
                    @Override
                    public Tuple2<String, String> map(deviceData value) {
                        String key = value.compID + "/" + value.machID + "/" + value.operationValue;
                        return Tuple2.of(key, value.operationData);
                    }
                })
        keyedStream.print();

        env.execute("EmqttFlinkMain");
    }
    private static List<deviceData> DataHandle(String message, String topic) {
        List<deviceData> d = new ArrayList<>();
        topic = "3333/D4:36:39:1A:0D:D3/Send/Data/FOCAS";
        try {
            JSONObject DataObject = JSON.parseObject(message);
            String dataType = (String) DataObject.get("type");
            if (dataType.equals("Data") || dataType.equals("data")) {
                String[] array = topic.split("/");

                JSONArray dataList = JSON.parseArray(DataObject.get("values").toString());

                String machID = DeviceMap.get(array[1]);
                if (machID != null) {
                    for (int i = 0; i < dataList.size(); i++) {
                        deviceData d1 = new deviceData();
                        JSONObject dataDict = dataList.getJSONObject(i);
                        d1.machID = machID;
                        d1.compID = array[0];
                        d1.gateMac = array[1];
                        d1.operationValue = dataDict.get("name").toString();
                        d1.operationData = dataDict.get("data").toString();
                        d1.gatherTime = dataDict.get("time").toString();
                        long unixTimestamp;
                        if(d1.gatherTime.length()>20){
                            long ms =Long.parseLong(d1.gatherTime.substring(20, 23));
                            Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(d1.gatherTime);
                            unixTimestamp = date.getTime() + ms;
                        }else {
                            Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(d1.gatherTime);
                            unixTimestamp = date.getTime();
                        }
                        d1.timestamp = unixTimestamp;
                        d.add(d1);
                    }
                } else {
                    System.out.println("无法解析数据");
                }
            }
        } catch (Throwable t) {
            t.printStackTrace();
        }
        return d;
    }
}



// 步骤三

class TimeLagWatermarkGenerator implements AssignerWithPeriodicWatermarks<deviceData> {
    private final long maxOutOfOrderness = 3500; // 3.5 seconds

    private long currentMaxTimestamp;

    @Override
    public long extractTimestamp(deviceData element, long previousElementTimestamp) {
        long timestamp = element.timestamp;
        currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
        return timestamp;
    }

    @Override
    public Watermark getCurrentWatermark() {
        // return the watermark as current highest timestamp minus the out-of-orderness bound
        return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

千里风雪

你的鼓励是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值