Trigger
A Trigger确定窗口(由窗口分配器形成)何时准备好由窗口函数处理。每个WindowAssigner都有一个默认值Trigger。如果默认触发器不符合您的需求,则可以使用指定自定义触发器trigger(…)。
触发器接口具有五种方法,它们允许a Trigger对不同事件做出反应:
- onElement()对于添加到窗口中的每个元素,都会调用该方法。
- onEventTime()当注册的事件时间计时器触发时,将调用该方法。
- onProcessingTime()当注册的处理时间计时器触发时,将调用该方法。
- 该onMerge()方法与有状态触发器相关,并且在两个触发器的相应窗口合并时(例如,在使用会话窗口时)合并两个触发器的状态。
- 最终,该clear()方法执行删除相应窗口后所需的任何操作。
关于上述方法,需要注意两件事:
1)前三个通过返回a来决定如何对调用事件采取行动TriggerResult。该动作可以是以下之一:
- CONTINUE: 没做什么,
- FIRE:触发计算,
- PURGE:清除窗口中的元素,然后
- FIRE_AND_PURGE:触发计算并随后清除窗口中的元素。
2)这些方法中的任何一种都可以用于注册处理或事件时间计时器以用于将来的操作。
package com.baizhi.jsy.trigger
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.AllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.{GlobalWindows, ProcessingTimeSessionWindows, SlidingProcessingTimeWindows, TumblingProcessingTimeWindows}
import org.apache.flink.streaming.api.windowing.triggers.CountTrigger
import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow}
import org.apache.flink.util.Collector
object FlinkWindowProcessGlobalWithTrigger {
def main(args: Array[String]): Unit = {
//1.创建流计算执⾏环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//2.创建DataStream - 细化
val text = env.socketTextStream("Centos",9999)
//3.执⾏行行DataStream的转换算⼦
val counts = text.flatMap(line=>line.split("\\s+"))
.windowAll(GlobalWindows.create())
.trigger(CountTrigger.of(4))
.apply(new UserDefineGlobalAllWindowFunction)
.print()
//5.执⾏行行流计算任务
env.execute("Global Window Stream WordCount")
}
}
class UserDefineGlobalAllWindowFunction extends AllWindowFunction[String,String,GlobalWindow] {
override def apply(window: GlobalWindow,
input: Iterable[String],
out: Collector[String]): Unit = {
var windowCount=input.toList
println("window:"+window+"\t"+windowCount.mkString("|"))
}
}
public abstract class Trigger<T, W extends Window> implements Serializable {
/**
只要有元素落⼊入到当前窗⼝口, 就会调⽤用该⽅方法
* @param element 收到的元素
* @param timestamp 元素抵达时间.
* @param window 元素所属的window窗⼝口.
* @param ctx ⼀一个上下⽂文对象,通常⽤用该对象注册 timer(ProcessingTime/EventTime) 回调.
*/
public abstract TriggerResult onElement(T element, long timestamp, W window,
TriggerContext ctx) throws Exception;
/**
* processing-time 定时器器回调函数
*
* @param time 定时器器触发的时间.
* @param window 定时器器触发的窗⼝口对象.
* @param ctx ⼀一个上下⽂文对象,通常⽤用该对象注册 timer(ProcessingTime/EventTime) 回调.
*/
public abstract TriggerResult onProcessingTime(long time, W window, TriggerContext
ctx) throws Exception;
/**
* event-time 定时器器回调函数
*
关于上述⽅方法,需要注意两件事:
1)前三个⽅方法决定如何通过返回TriggerResult来决定窗⼝口是否就绪。
* @param time 定时器器触发的时间.
* @param window 定时器器触发的窗⼝口对象.
* @param ctx ⼀一个上下⽂文对象,通常⽤用该对象注册 timer(ProcessingTime/EventTime) 回调.
*/
public abstract TriggerResult onEventTime(long time, W window, TriggerContext ctx)
throws Exception;
/**
* 当 多个窗⼝口合并到⼀一个窗⼝口的时候,调⽤用该⽅方法,例例如系统SessionWindow
* {@link org.apache.flink.streaming.api.windowing.assigners.WindowAssigner}.
*
* @param window 合并后的新窗⼝口对象
* @param ctx ⼀一个上下⽂文对象,通常⽤用该对象注册 timer(ProcessingTime/EventTime)回调以及访问
状态
*/
public void onMerge(W window, OnMergeContext ctx) throws Exception {
throw new UnsupportedOperationException("This trigger does not support merging.");
}
/**
* 当窗⼝口被删除后执⾏行行所需的任何操作。例例如:可以清除定时器器或者删除状态数据
*/
public abstract void clear(W window, TriggerContext ctx) throws Exception;
}
关于上述方法,需要注意两件事:
1)前三个方法在这里插入代码片
决定如何通过返回TriggerResult来决定窗口是否就绪。
public enum TriggerResult {
/**
* 不不触发,也不不删除元素
*/
CONTINUE(false, false),
/**
* 触发窗⼝口,窗⼝口出发后删除窗⼝口中的元素
*/
FIRE_AND_PURGE(true, true),
/**
* 触发窗⼝口,但是保留留窗⼝口元素
*/
FIRE(true, false),
/**
* 不不触发窗⼝口,丢弃窗⼝口,并且删除窗⼝口的元素
*/
PURGE(false, true);
private final boolean fire;//是否触发窗⼝口
private final boolean purge;//是否清除窗⼝口元素
...
}
这些方法中的任何一种都可以用于注册处理理或事件时间计时器以用于将来的操作.
案例使用
trigger自己写触发器
package com.baizhi.jsy.trigger
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.api.common.state.ReducingStateDescriptor
import org.apache.flink.streaming.api.windowing.triggers.{Trigger, TriggerResult}
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow
import org.apache.flink.streaming.api.scala._
class UserDefineCountTrigger(maxCount:Long) extends Trigger[String,GlobalWindow]{
var rsd:ReducingStateDescriptor[Long]=new ReducingStateDescriptor[Long]("rsd",new ReduceFunction[Long] {
override def reduce(t: Long, t1: Long): Long = {
(t+t1)
}
},createTypeInformation[Long])
override def onElement(t: String, l: Long, w: GlobalWindow, triggerContext: Trigger.TriggerContext): TriggerResult = {
val state = triggerContext.getPartitionedState(rsd)
state.add(1L)
if(state.get()>=maxCount){
state.clear()
return TriggerResult.FIRE_AND_PURGE
}else{
return TriggerResult.CONTINUE
}
}
override def onProcessingTime(l: Long, w: GlobalWindow, triggerContext: Trigger.TriggerContext): TriggerResult = ???
override def onEventTime(l: Long, w: GlobalWindow, triggerContext: Trigger.TriggerContext): TriggerResult = ???
override def clear(w: GlobalWindow, triggerContext: Trigger.TriggerContext): Unit = {
println("-------------")
triggerContext.getPartitionedState(rsd)
}
}
package com.baizhi.jsy.trigger
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.AllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.{GlobalWindows, ProcessingTimeSessionWindows, SlidingProcessingTimeWindows, TumblingProcessingTimeWindows}
import org.apache.flink.streaming.api.windowing.triggers.CountTrigger
import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow}
import org.apache.flink.util.Collector
object FlinkWindowProcessGlobalWithTrigger {
def main(args: Array[String]): Unit = {
//1.创建流计算执⾏环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//2.创建DataStream - 细化
val text = env.socketTextStream("Centos",9999)
//3.执⾏DataStream的转换算⼦
val counts = text.flatMap(line=>line.split("\\s+"))
.windowAll(GlobalWindows.create())
.trigger(new UserDefineCountTrigger(4))
.apply(new UserDefineGlobalAllWindowFunction)
.print()
//5.执⾏流计算任务
env.execute("Global Window Stream WordCount")
}
}
class UserDefineGlobalAllWindowFunction extends AllWindowFunction[String,String,GlobalWindow] {
override def apply(window: GlobalWindow,
input: Iterable[String],
out: Collector[String]): Unit = {
var windowCount=input.toList
println("window:"+window+"\t"+windowCount.mkString("|"))
}
}
Evictors(剔除器)
Flink的窗口模型允许除了WindowAssigner和Trigger之外还指定一个可选的Evictor。可以使用evictor(…)方法来完成此操作。Evictors可以在触发器触发后,应用Window Function之前或之后从窗口中删除元素。
public interface Evictor<T, W extends Window> extends Serializable {
/**
* 在调⽤用windowing function之前被调⽤用.
*
* @param 当前窗⼝口中的所有元素
* @param size 当前窗⼝口元素的总数
* @param window The {@link Window}
* @param evictorContext Evictor上下⽂文对象
*/
void evictBefore(Iterable<TimestampedValue<T>> elements, int size, W window,
EvictorContext evictorContext);
/**
* 在调⽤用 windowing function之后调⽤用.
*
* @param elements The elements currently in the pane.
* @param size The current number of elements in the pane.
* @param window The {@link Window}
* @param evictorContext The context for the Evictor
*/
void evictAfter(Iterable<TimestampedValue<T>> elements, int size, W window,
EvictorContext evictorContext);
}
evictBefore()包含要在窗口函数之前应用的剔除逻辑,而evictA"er()包含要在窗口函数之后应用的剔除逻辑。应用窗口功能之前剔除的元素将不会被其处理理。
Flink附带了三个预先实施的驱逐程序。这些是:
CountEvictor 保留指定个数
从窗口中保留用户指定数量的元素,并从窗口缓冲区的开头丢弃其余的元素。
保留三个元素案例
package com.baizhi.jsy.evictors
import java.text.SimpleDateFormat
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.AllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.{GlobalWindows, ProcessingTimeSessionWindows, SlidingProcessingTimeWindows, TumblingProcessingTimeWindows}
import org.apache.flink.streaming.api.windowing.evictors.CountEvictor
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.CountTrigger
import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow}
import org.apache.flink.util.Collector
object FlinkWindowProcessGlobalWithEvictor {
def main(args: Array[String]): Unit = {
//1.创建流计算执⾏环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//2.创建DataStream - 细化
val text = env.socketTextStream("Centos",9999)
//3.执⾏行行DataStream的转换算⼦
val counts = text.flatMap(line=>line.split("\\s+"))
.windowAll(TumblingProcessingTimeWindows.of(Time.seconds(5)))
.evictor(CountEvictor.of(3))
.apply(new UserDefineGlobalAllWindowFunction)
.print()
//5.执⾏流计算任务
env.execute("Global Window Stream WordCount")
}
}
class UserDefineGlobalAllWindowFunction extends AllWindowFunction[String,String,TimeWindow] {
override def apply(window: TimeWindow,
input: Iterable[String],
out: Collector[String]): Unit = {
val format = new SimpleDateFormat("HH:mm:ss")
val str = format.format(window.getStart)
val end = format.format(window.getEnd)
var windowCount=input.toList
println("时间:"+str+"\t"+end+"\t"+windowCount.mkString("|"))
}
}
private void evict(Iterable<TimestampedValue<Object>> elements, int size,
EvictorContext ctx) {
if (size <= maxCount) {
return;
} else {
int evictedCount = 0;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext();){
iterator.next();
evictedCount++;
if (evictedCount > size - maxCount) {
break;
} else {
iterator.remove();
}
}
}
}
DeltaEvictor
采用DeltaFunction和阈值,计算窗口缓冲区中最后一个元素与其余每个元素之间的增量,并删除增量大于或等于阈值的元素。
private void evict(Iterable<TimestampedValue<T>> elements, int size, EvictorContext
ctx) {
TimestampedValue<T> lastElement = Iterables.getLast(elements);
for (Iterator<TimestampedValue<T>> iterator = elements.iterator();
iterator.hasNext();){
TimestampedValue<T> element = iterator.next();
//如果最后⼀一个元素和前⾯面元素差值⼤大于threshold
if (deltaFunction.getDelta(element.getValue(), lastElement.getValue()) >=
this.threshold) {
iterator.remove();
}
}
}
TimeEvictor
以毫秒为单位的间隔作为参数,对于给定的窗口,它将在其元素中找到最大时间戳max_ts,并删除所有时间戳⼩小于max_ts-interval的元素。- 只要最新的一段时间隔的数据
private void evict(Iterable<TimestampedValue<Object>> elements, int size,
EvictorContext ctx) {
if (!hasTimestamp(elements)) {
return;
}
//获取最⼤大时间戳
long currentTime = getMaxTimestamp(elements);
long evictCutoff = currentTime - windowSize;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext(); ) {
TimestampedValue<Object> record = iterator.next();
if (record.getTimestamp() <= evictCutoff) {
iterator.remove();
}
}
}
private boolean hasTimestamp(Iterable<TimestampedValue<Object>> elements) {
Iterator<TimestampedValue<Object>> it = elements.iterator();
if (it.hasNext()) {
return it.next().hasTimestamp();
}
return false;
}
private long getMaxTimestamp(Iterable<TimestampedValue<Object>> elements) {
long currentTime = Long.MIN_VALUE;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext();){
TimestampedValue<Object> record = iterator.next();
currentTime = Math.max(currentTime, record.getTimestamp());
}
return currentTime;
}
用户自定义evictor
UserDefineEvictor
package com.baizhi.jsy.evictors
import java.text.SimpleDateFormat
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.AllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.{GlobalWindows, ProcessingTimeSessionWindows, SlidingProcessingTimeWindows, TumblingProcessingTimeWindows}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow}
import org.apache.flink.util.Collector
object FlinkWindowProcessSlidingWithEvictor {
def main(args: Array[String]): Unit = {
//1.创建流计算执⾏环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//2.创建DataStream - 细化
val text = env.socketTextStream("Centos",9999)
//3.执⾏DataStream的转换算⼦ 滑动窗口
val counts = text.windowAll(SlidingProcessingTimeWindows.of(Time.seconds(4),Time.seconds(2)))
.evictor(new UserDefineEvictor(false,"error"))
.apply(new UserDefineSlidingAllWindowFunction)
.print()
env.execute("Sliding Window Stream WordCount")
}
}
class UserDefineSlidingAllWindowFunction extends AllWindowFunction[String,String,TimeWindow] {
override def apply(window: TimeWindow,
input: Iterable[String],
out: Collector[String]): Unit = {
val format = new SimpleDateFormat("HH:mm:ss")
val str = format.format(window.getStart)
val end = format.format(window.getEnd)
var windowCount=input.toList
println("时间:"+str+"\t"+end+"\t"+windowCount.mkString("|"))
}
}
package com.baizhi.jsy.evictors;
import org.apache.flink.streaming.api.windowing.evictors.Evictor;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.runtime.operators.windowing.TimestampedValue;
import java.util.Iterator;
public class UserDefineEvictor implements Evictor<String, TimeWindow> {
private Boolean isEvictorafter=false;
private String excludeContent=null;
public UserDefineEvictor(Boolean isEvictorafter, String excludeContent) {
this.isEvictorafter = isEvictorafter;
this.excludeContent = excludeContent;
}
@Override
public void evictBefore(Iterable<TimestampedValue<String>> iterable, int i, TimeWindow timeWindow, EvictorContext evictorContext) {
if(!isEvictorafter){
evict(iterable,i,timeWindow,evictorContext);
}
}
@Override
public void evictAfter(Iterable<TimestampedValue<String>> iterable, int i, TimeWindow timeWindow, EvictorContext evictorContext) {
if(isEvictorafter){
evict(iterable,i,timeWindow,evictorContext);
}
}
private void evict(Iterable<TimestampedValue<String>> iterable, int i, TimeWindow timeWindow, EvictorContext evictorContext){
for (Iterator<TimestampedValue<String>> iterator = iterable.iterator();iterator.hasNext();){
TimestampedValue<String> next = iterator.next();
//System.out.println(next.getValue());
//将含有相关内容的元素删除
if(next.getValue().contains(excludeContent)){
iterator.remove();
}
}
}
}