利用Flink state实现topN
获取数据源,自定义下沉器本处暂时不赘述,主要是对核心topN的代码进行解析
- 获取数据流并转化成对象
DataStream<UserAction> dataStream = dataStreamSource.map(value-> JSONObject.parseObject(value,UserAction.class));
- 将乱序数据抽取出来,设置watermark
DataStream<UserAction> timedData = dataStream.assignTimestampsAndWatermarks(new UserActionTSExtractor())
public static class UserActionTSExtractor extends BoundedOutOfOrdernessTimestampExtractor<UserAction>{
public UserActionTSExtractor<UserAction>{
super(Time.seconds(MAX_EVENT_DELAY))
}
@Override
public long extractTimestamp(UserAction userAction){
return userAction.getTimeStamp()
}
}
- 过滤出购买行为
DataStream<UserAction> filterData = timedData.filter(new FilterFunction<UserAction>{
@Override
public boolean filter(userAction userAction) throws Exception{
return userAction.getBehavior().contains("buy")
}
})
- 窗口统计购买数量
DataStream<ItemBuyCount> windowedData = filterData.keyBy("itemId")
.timeWindow(Time.minutes(60L),Time.minutes(5L))
.aggregate(new CountAgg(), new WindowResultFunction())
//商品购买实体类
public static class ItemBuyCount{
public long itemId;
public long windowEnd;
public long buyCount;
}
//count聚合函数
public static class CountAgg implements AggregateFunction<UserAction,Long,Long>{
@Override
public Long createAccumulator(){
return 0L;
}
@Override
public Long add(UserAction userAction,Long acc){
return acc+1;
}
@Override
public Long getResult(Long acc){
return acc;
}
@Override
public Long merge(Long acc1,Long acc2){
return acc1+acc2;
}
}
public static class WindowResultFunction implements WindowFunction<Long,ItemBuCount,Tuple,TimeWindow>{
@Override
public void apply(Tuple key,TimeWindow window,Iterable<Long> aggregatationResult,Collector<ItemBuyCount> collector) throws Wxception{
Long itemId = (Tuple<Long> key).f0;
Long count = aggregationresult.iterator.next();
collector.collect(ItemBuyCount.of(itemId,window.getEnd(),count))
}
}
- 计算TopN
DataStream<List<ItemBuyCount>> topItems = windowedData.keyBy("windowEnd").process(new TopNHotItems(3));
public static TopNHotItems extends KeyedProcessFunction<Tuple,ItemBuyCount,List<ItemBuyCount>>{
private final int topSize;
private ListState<ItemBuyCount> itemState;
public TopNHotItems(int topSize){
this.topSize = topSize;
}
@Override
public void open(Configuration parameters) throws Exception{
super.open(parameters);
//注册状态
ListStateDescriptor<ItemBuyCount> itemViewStateDesc = new ListStateDescriptor<ItemBuyCount>("itemState",ItemBuyCount.class);
itemState = getRuntimeContext().getListState(itemViewStateDesc);
}
@Override
public void processElement(ItemBuyCount input,Context context, Collector<List<ItemBuyCount>> collector) throws Exception{
itemState.add(input);
//注册 window+1的EventTime,当触发时候,说明已经收集了windowEnd的所有商品
context.timerService().registerEventTimeTimer(input.windowEnd+1);
}
@Override
public void onTimer(long timestamp,OnTimerContext ctx,Collector<List<ItemBuyCount>> out) throws Exception{
List<ItemBuyCount> allItems = new ArrayList<ItemBuyCount>();
for(ItemBuyCount item:itemState.get()){
allItems.add(item);
}
//请空状态,释放空间
itemState.clear();
allItems.sort(new Comparator<ItemBuyCount>(){
@Override
public int compare(ItemBuyCount o1,ItembuyCount o2){return (int) o2.buyCount-o1.buyCount}
});
List<ItemBuyCount> itemBuyCounts = new ArrayList<>();
for(int i=0;i<topSize;i++){
ItemBuyCount currentItem = allItems.get(i);
itemBuyCounts.add(currentItem);
}
out.collect(itemBuyCounts);
}
}
10.最后把得到的流输出到自定义的sink中