有关Flink窗口计算中的剔除器(Evictors)

Flink的窗⼝模型允许除了WindowAssigner和Trigger之外还指定⼀个可选的Evictor。可以使⽤evictor(…)⽅法来完成此操作。Evictors可以在触发器触发后,应⽤Window Function之前或之后从窗⼝中删除元素。

public interface Evictor<T, W extends Window> extends Serializable {
 /**
 * 在调⽤windowing function之前被调⽤.
 *
 * @param 当前窗⼝中的所有元素
 * @param size 当前窗⼝元素的总数
 * @param window The {@link Window}
 * @param evictorContext Evictor上下⽂对象
 */
 void evictBefore(Iterable<TimestampedValue<T>> elements, int size, W window,
EvictorContext evictorContext);
 /**
 * 在调⽤ windowing function之后调⽤.
 *
 * @param elements The elements currently in the pane.
 * @param size The current number of elements in the pane.
 * @param window The {@link Window}
 * @param evictorContext The context for the Evictor
 */
 void evictAfter(Iterable<TimestampedValue<T>> elements, int size, W window,
EvictorContext evictorContext);
}

evictBefore()包含要在窗⼝函数之前应⽤的剔除逻辑,⽽evictA"er()包含要在窗⼝函数之后应⽤的剔除逻辑。应⽤窗⼝功能之前剔除的元素将不会被其处理。
Flink附带了三个预先实施的驱逐程序。这些是:
CountEvictor:从窗⼝中保留⽤户指定数量的元素,并从窗⼝缓冲区的开头丢弃其余的元素。

private void evict(Iterable<TimestampedValue<Object>> elements, int size,
EvictorContext ctx) {
 if (size <= maxCount) {
 return;
 } else {
 int evictedCount = 0;
 for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext();){
 iterator.next();
 evictedCount++;
 if (evictedCount > size - maxCount) {
 break;
 } else {
 iterator.remove();
 		 }
 	  }
    }
 }

DeltaEvictor
采⽤DeltaFunction和阈值,计算窗⼝缓冲区中最后⼀个元素与其余每个元素之间的增量,并删除增量⼤于或等于阈值的元素。

 private void evict(Iterable<TimestampedValue<T>> elements, int size, EvictorContext
ctx) {
 TimestampedValue<T> lastElement = Iterables.getLast(elements);
 for (Iterator<TimestampedValue<T>> iterator = elements.iterator();
iterator.hasNext();){
 TimestampedValue<T> element = iterator.next();
 //如果最后⼀个元素和前⾯元素差值⼤于threshold
 if (deltaFunction.getDelta(element.getValue(), lastElement.getValue()) >=
this.threshold) {
 iterator.remove();
	 }
   }
 }

TimeEvictor
以毫秒为单位的间隔作为参数,对于给定的窗⼝,它将在其元素中找到最⼤时间戳max_ts,并删除所有时间戳⼩于max_ts-interval的元素。- 只要最新的⼀段时间间隔的数据。

private void evict(Iterable<TimestampedValue<Object>> elements, int size,
EvictorContext ctx) {
 if (!hasTimestamp(elements)) {
 return;
 }
 //获取最⼤时间戳
 long currentTime = getMaxTimestamp(elements);
 long evictCutoff = currentTime - windowSize;
 for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext(); ) {
 TimestampedValue<Object> record = iterator.next();
 if (record.getTimestamp() <= evictCutoff) {
 iterator.remove();
 	}
   }
 }
private boolean hasTimestamp(Iterable<TimestampedValue<Object>> elements) {
 Iterator<TimestampedValue<Object>> it = elements.iterator();
 if (it.hasNext()) {
 return it.next().hasTimestamp();
 }
 return false;
 }
private long getMaxTimestamp(Iterable<TimestampedValue<Object>> elements) {
 long currentTime = Long.MIN_VALUE;
 for (Iterator<TimestampedValue<Object>> iterator = elements.iterator();
iterator.hasNext();){
 TimestampedValue<Object> record = iterator.next();
 currentTime = Math.max(currentTime, record.getTimestamp());
 }
 return currentTime;
 }

UserDefineEvictor(自定义剔除器)

public class UserDefineEvictor implements Evictor<String, TimeWindow> {
 private Boolean isEvictorAfter=false;
 private String excludeContent=null;
 public UserDefineEvictor(Boolean isEvictorAfter, String excludeContent) {
 this.isEvictorAfter = isEvictorAfter;
 this.excludeContent = excludeContent;
 }
 @Override
 public void evictBefore(Iterable<TimestampedValue<String>> elements, int size,
TimeWindow window, EvictorContext evictorContext) {
 if(!isEvictorAfter){
 evict(elements,size,window,evictorContext);
 }
 }
 @Override
 public void evictAfter(Iterable<TimestampedValue<String>> elements, int size,
TimeWindow window, EvictorContext evictorContext) {
 if(isEvictorAfter){
 evict(elements,size,window,evictorContext);
 }
 }
 private void evict(Iterable<TimestampedValue<String>> elements, int size,
TimeWindow window, EvictorContext evictorContext){
 for( Iterator<TimestampedValue<String>> iterator =
elements.iterator();iterator.hasNext();){
 TimestampedValue<String> element = iterator.next();
 //将含有相关内容元素删除
 System.out.println(element.getValue());
 if(element.getValue().contains(excludeContent)){
 iterator.remove();
 		}
	 }
   }
}
object FlinkSlidingWindowWithUserDefineEvictor {
 def main(args: Array[String]): Unit = {
 val env = StreamExecutionEnvironment.getExecutionEnvironment
 val text = env.socketTextStream("CentOS", 9999)
 //3.执⾏DataStream的转换算⼦
 val counts =
text.windowAll(SlidingProcessingTimeWindows.of(Time.seconds(4),Time.seconds(2)))
 .evictor(new UserDefineEvictor(false,"error"))
 .apply(new UserDefineSlidingWindowFunction)
 .print()
 //5.执⾏流计算任务
 env.execute("Sliding Window Stream WordCount")
 }
}
class UserDefineSlidingWindowFunction extends
AllWindowFunction[String,String,TimeWindow]{
 override def apply(window: TimeWindow,
 input: Iterable[String],
 out: Collector[String]): Unit = {
 val sdf = new SimpleDateFormat("HH:mm:ss")
 var start=sdf.format(window.getStart)
 var end=sdf.format(window.getEnd)
 var windowContent=input.toList
 println("window:"+start+"\t"+end+" "+windowContent.mkString(" | "))
 }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值