flink 不设置水印,如何对不带水印的flink联合数据流进行排序

The flink flow has multi data stream, then I merge those data stream with org.apache.flink.streaming.api.datastream.DataStream#union method.

Then, I got the problem, the datastream is disordered and I can not set window to sort the data in data stream.

I got the the answer, but the com.liam.learn.flink.example.union.UnionStreamDemo.SortFunction#onTimer

never been invoked.

Environment Info: flink version 1.7.0

In general, I hope to sort the union datastream witout watermark.

解决方案

You need watermarks so that the sorting function knows when it can safely emit sorted elements. Without watermarks, you get get an record from stream B that has an earlier date than any of the first N records of stream A, right?

But adding watermarks is easy, especially if you know that "event time" is strictly increasing for any one stream. Below is some code I wrote that extends what David Anderson posted in his answer to the other SO issue you referenced above - hopefully this will get you started.

-- Ken

package com.scaleunlimited.flinksnippets;

import java.util.PriorityQueue;

import java.util.Random;

import org.apache.flink.api.common.state.ValueState;

import org.apache.flink.api.common.state.ValueStateDescriptor;

import org.apache.flink.api.common.typeinfo.TypeHint;

import org.apache.flink.api.common.typeinfo.TypeInformation;

import org.apache.flink.configuration.Configuration;

import org.apache.flink.streaming.api.TimeCharacteristic;

import org.apache.flink.streaming.api.TimerService;

import org.apache.flink.streaming.api.datastream.DataStream;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import org.apache.flink.streaming.api.functions.KeyedProcessFunction;

import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;

import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;

import org.apache.flink.util.Collector;

import org.junit.Test;

public class MergeAndSortStreamsTest {

@Test

public void testMergeAndSort() throws Exception {

StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(2);

env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

DataStream streamA = env.addSource(new EventSource("A"))

.assignTimestampsAndWatermarks(new EventTSWAssigner());

DataStream streamB = env.addSource(new EventSource("B"))

.assignTimestampsAndWatermarks(new EventTSWAssigner());

streamA.union(streamB)

.keyBy(r -> r.getKey())

.process(new SortByTimestampFunction())

.print();

env.execute();

}

private static class Event implements Comparable {

private String _label;

private long _timestamp;

public Event(String label, long timestamp) {

_label = label;

_timestamp = timestamp;

}

public String getLabel() {

return _label;

}

public void setLabel(String label) {

_label = label;

}

public String getKey() {

return "1";

}

public long getTimestamp() {

return _timestamp;

}

public void setTimestamp(long timestamp) {

_timestamp = timestamp;

}

@Override

public String toString() {

return String.format("%s @ %d", _label, _timestamp);

}

@Override

public int compareTo(Event o) {

return Long.compare(_timestamp, o._timestamp);

}

}

@SuppressWarnings("serial")

private static class EventTSWAssigner extends AscendingTimestampExtractor {

@Override

public long extractAscendingTimestamp(Event element) {

return element.getTimestamp();

}

}

@SuppressWarnings("serial")

private static class SortByTimestampFunction extends KeyedProcessFunction {

private ValueState> queueState = null;

@Override

public void open(Configuration config) {

ValueStateDescriptor> descriptor = new ValueStateDescriptor<>(

// state name

"sorted-events",

// type information of state

TypeInformation.of(new TypeHint>() {

}));

queueState = getRuntimeContext().getState(descriptor);

}

@Override

public void processElement(Event event, Context context, Collector out) throws Exception {

TimerService timerService = context.timerService();

long currentWatermark = timerService.currentWatermark();

System.out.format("processElement called with watermark %d\n", currentWatermark);

if (context.timestamp() > currentWatermark) {

PriorityQueue queue = queueState.value();

if (queue == null) {

queue = new PriorityQueue<>(10);

}

queue.add(event);

queueState.update(queue);

timerService.registerEventTimeTimer(event.getTimestamp());

}

}

@Override

public void onTimer(long timestamp, OnTimerContext context, Collector out) throws Exception {

PriorityQueue queue = queueState.value();

long watermark = context.timerService().currentWatermark();

System.out.format("onTimer called with watermark %d\n", watermark);

Event head = queue.peek();

while (head != null && head.getTimestamp() <= watermark) {

out.collect(head);

queue.remove(head);

head = queue.peek();

}

}

}

@SuppressWarnings("serial")

private static class EventSource extends RichParallelSourceFunction {

private String _prefix;

private transient Random _rand;

private transient boolean _running;

private transient int _numEvents;

public EventSource(String prefix) {

_prefix = prefix;

}

@Override

public void open(Configuration parameters) throws Exception {

super.open(parameters);

_rand = new Random(_prefix.hashCode() + getRuntimeContext().getIndexOfThisSubtask());

}

@Override

public void cancel() {

_running = false;

}

@Override

public void run(SourceContext context) throws Exception {

_running = true;

_numEvents = 0;

long timestamp = System.currentTimeMillis() + _rand.nextInt(10);

while (_running && (_numEvents < 100)) {

long deltaTime = timestamp - System.currentTimeMillis();

if (deltaTime > 0) {

Thread.sleep(deltaTime);

}

context.collect(new Event(_prefix, timestamp));

_numEvents++;

// Generate a timestamp every 5...15 ms, average is 10.

timestamp += (5 + _rand.nextInt(10));

}

}

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值