Flink的窗⼝模型允许除了WindowAssigner和Trigger之外还指定⼀个可选的Evictor。可以使⽤evictor(…)⽅法来完成此操作。Evictors可以在触发器触发后,应⽤Window Function之前或之后从窗⼝中删除元素。
public interface Evictor<T, W extends Window> extends Serializable {
/**
* 在调⽤windowing function之前被调⽤.
*
* @param 当前窗⼝中的所有元素
* @param size 当前窗⼝元素的总数
* @param window The {@link Window}
* @param evictorContext Evictor上下⽂对象
*/
void evictBefore(Iterable<TimestampedValue<T>> elements, int size, W window,
EvictorContext evictorContext);
/**
* 在调⽤ windowing function之后调⽤.
*
* @param elements The elements currently in the pane.
* @param size The current number of elements in the pane.
* @param window The {@link Window}
* @param evictorContext The context for the Evictor
*/
void evictAfter(Iterable<TimestampedValue<T>> elements, int size, W window,
EvictorContext evictorContext);
}
evictBefore()包含要在窗⼝函数之前应⽤的剔除逻辑,⽽evictA"er()包含要在窗⼝函数之后应⽤的剔除逻辑。应⽤窗⼝功能之前剔除的元素将不会被其处理。
Flink附带了三个预先实施的驱逐程序。这些是:
CountEvictor:从窗⼝中保留⽤户指定数量的元素,并从窗⼝缓冲区的开头丢弃其余的元素。
private void evict(Iterable<TimestampedValue<Object>> elements, int size,
EvictorContext ctx) {
if (size <= maxCount) {
return;
} else {
int evictedCount = 0;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext();){
iterator.next();
evictedCount++;
if (evictedCount > size - maxCount) {
break;
} else {
iterator.remove();
}
}
}
}
DeltaEvictor
采⽤DeltaFunction和阈值,计算窗⼝缓冲区中最后⼀个元素与其余每个元素之间的增量,并删除增量⼤于或等于阈值的元素。
private void evict(Iterable<TimestampedValue<T>> elements, int size, EvictorContext
ctx) {
TimestampedValue<T> lastElement = Iterables.getLast(elements);
for (Iterator<TimestampedValue<T>> iterator = elements.iterator();
iterator.hasNext();){
TimestampedValue<T> element = iterator.next();
//如果最后⼀个元素和前⾯元素差值⼤于threshold
if (deltaFunction.getDelta(element.getValue(), lastElement.getValue()) >=
this.threshold) {
iterator.remove();
}
}
}
TimeEvictor
以毫秒为单位的间隔作为参数,对于给定的窗⼝,它将在其元素中找到最⼤时间戳max_ts,并删除所有时间戳⼩于max_ts-interval的元素。- 只要最新的⼀段时间间隔的数据。
private void evict(Iterable<TimestampedValue<Object>> elements, int size,
EvictorContext ctx) {
if (!hasTimestamp(elements)) {
return;
}
//获取最⼤时间戳
long currentTime = getMaxTimestamp(elements);
long evictCutoff = currentTime - windowSize;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext(); ) {
TimestampedValue<Object> record = iterator.next();
if (record.getTimestamp() <= evictCutoff) {
iterator.remove();
}
}
}
private boolean hasTimestamp(Iterable<TimestampedValue<Object>> elements) {
Iterator<TimestampedValue<Object>> it = elements.iterator();
if (it.hasNext()) {
return it.next().hasTimestamp();
}
return false;
}
private long getMaxTimestamp(Iterable<TimestampedValue<Object>> elements) {
long currentTime = Long.MIN_VALUE;
for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext();){
TimestampedValue<Object> record = iterator.next();
currentTime = Math.max(currentTime, record.getTimestamp());
}
return currentTime;
}
UserDefineEvictor(自定义剔除器)
public class UserDefineEvictor implements Evictor<String, TimeWindow> {
private Boolean isEvictorAfter=false;
private String excludeContent=null;
public UserDefineEvictor(Boolean isEvictorAfter, String excludeContent) {
this.isEvictorAfter = isEvictorAfter;
this.excludeContent = excludeContent;
}
@Override
public void evictBefore(Iterable<TimestampedValue<String>> elements, int size,
TimeWindow window, EvictorContext evictorContext) {
if(!isEvictorAfter){
evict(elements,size,window,evictorContext);
}
}
@Override
public void evictAfter(Iterable<TimestampedValue<String>> elements, int size,
TimeWindow window, EvictorContext evictorContext) {
if(isEvictorAfter){
evict(elements,size,window,evictorContext);
}
}
private void evict(Iterable<TimestampedValue<String>> elements, int size,
TimeWindow window, EvictorContext evictorContext){
for( Iterator<TimestampedValue<String>> iterator =
elements.iterator();iterator.hasNext();){
TimestampedValue<String> element = iterator.next();
//将含有相关内容元素删除
System.out.println(element.getValue());
if(element.getValue().contains(excludeContent)){
iterator.remove();
}
}
}
}
object FlinkSlidingWindowWithUserDefineEvictor {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val text = env.socketTextStream("CentOS", 9999)
//3.执⾏DataStream的转换算⼦
val counts =
text.windowAll(SlidingProcessingTimeWindows.of(Time.seconds(4),Time.seconds(2)))
.evictor(new UserDefineEvictor(false,"error"))
.apply(new UserDefineSlidingWindowFunction)
.print()
//5.执⾏流计算任务
env.execute("Sliding Window Stream WordCount")
}
}
class UserDefineSlidingWindowFunction extends
AllWindowFunction[String,String,TimeWindow]{
override def apply(window: TimeWindow,
input: Iterable[String],
out: Collector[String]): Unit = {
val sdf = new SimpleDateFormat("HH:mm:ss")
var start=sdf.format(window.getStart)
var end=sdf.format(window.getEnd)
var windowContent=input.toList
println("window:"+start+"\t"+end+" "+windowContent.mkString(" | "))
}
}