Flink state应用--实现topN

利用Flink state实现topN

获取数据源,自定义下沉器本处暂时不赘述,主要是对核心topN的代码进行解析

  1. 获取数据流并转化成对象
DataStream<UserAction> dataStream = dataStreamSource.map(value-> JSONObject.parseObject(value,UserAction.class));
  1. 将乱序数据抽取出来,设置watermark
DataStream<UserAction> timedData = dataStream.assignTimestampsAndWatermarks(new UserActionTSExtractor())



public static class UserActionTSExtractor extends BoundedOutOfOrdernessTimestampExtractor<UserAction>{
   public UserActionTSExtractor<UserAction>{
       super(Time.seconds(MAX_EVENT_DELAY))
   }
   @Override
   public long extractTimestamp(UserAction userAction){
      return userAction.getTimeStamp()
   }
}
  1. 过滤出购买行为
DataStream<UserAction> filterData = timedData.filter(new FilterFunction<UserAction>{
   @Override
   public boolean filter(userAction userAction) throws Exception{
      return userAction.getBehavior().contains("buy")
   }
})
  1. 窗口统计购买数量
 DataStream<ItemBuyCount> windowedData = filterData.keyBy("itemId")
 .timeWindow(Time.minutes(60L),Time.minutes(5L))
 .aggregate(new CountAgg(), new WindowResultFunction())

//商品购买实体类
public static class ItemBuyCount{
   public long itemId;
   public long windowEnd;
   public long buyCount;
   
}

//count聚合函数
public static class CountAgg implements AggregateFunction<UserAction,Long,Long>{
  @Override
  public Long createAccumulator(){
      return 0L;
  }
@Override
public Long add(UserAction userAction,Long acc){
   return acc+1;

}

@Override
public Long getResult(Long acc){
    return acc;
}

@Override 
public Long merge(Long acc1,Long acc2){
   return acc1+acc2;
}

}



public static class WindowResultFunction implements WindowFunction<Long,ItemBuCount,Tuple,TimeWindow>{
   @Override 
   public void apply(Tuple key,TimeWindow window,Iterable<Long> aggregatationResult,Collector<ItemBuyCount> collector) throws Wxception{
       Long itemId = (Tuple<Long> key).f0;
       Long count = aggregationresult.iterator.next();
       collector.collect(ItemBuyCount.of(itemId,window.getEnd(),count))
   }
}
  1. 计算TopN
DataStream<List<ItemBuyCount>> topItems = windowedData.keyBy("windowEnd").process(new TopNHotItems(3)); 
public static TopNHotItems extends KeyedProcessFunction<Tuple,ItemBuyCount,List<ItemBuyCount>>{
   private final int topSize;
   private ListState<ItemBuyCount> itemState;
   public TopNHotItems(int topSize){
     this.topSize = topSize;
   }
   @Override
   public void open(Configuration parameters) throws Exception{
      super.open(parameters);
      //注册状态
      ListStateDescriptor<ItemBuyCount> itemViewStateDesc = new ListStateDescriptor<ItemBuyCount>("itemState",ItemBuyCount.class);
      itemState = getRuntimeContext().getListState(itemViewStateDesc);
   }

   @Override
   public void processElement(ItemBuyCount input,Context context, Collector<List<ItemBuyCount>> collector) throws Exception{
       itemState.add(input);
       //注册 window+1的EventTime,当触发时候,说明已经收集了windowEnd的所有商品
       context.timerService().registerEventTimeTimer(input.windowEnd+1);
   }



@Override
public void onTimer(long timestamp,OnTimerContext ctx,Collector<List<ItemBuyCount>> out) throws Exception{
    List<ItemBuyCount> allItems = new ArrayList<ItemBuyCount>();
    for(ItemBuyCount item:itemState.get()){
         allItems.add(item);
    }
    //请空状态,释放空间
    itemState.clear();
    allItems.sort(new Comparator<ItemBuyCount>(){
         @Override
         public int compare(ItemBuyCount o1,ItembuyCount o2){return (int) o2.buyCount-o1.buyCount}
    });

    List<ItemBuyCount> itemBuyCounts = new ArrayList<>();
    for(int i=0;i<topSize;i++){
        ItemBuyCount currentItem = allItems.get(i);
        itemBuyCounts.add(currentItem);
    }

   out.collect(itemBuyCounts);
}
   
}

10.最后把得到的流输出到自定义的sink中

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值