求每分钟里点击量前3的热门商品 Flink java代码

求每分钟里点击量前3的热门商品

数据示例:

UserBehavior1.csv

用户ID,商品ID,商品类目ID,用户行为,发生时间
58,16,5,fav,1569866397000
834,22,0,buy,1569866397000
56,33,0,cart,1569866397000
162,43,1,pv,1569866397000

由于数据过多不展示全部

HotItems.java

package com.xxxxx.flink.demo.topGoods;

import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;

import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import scala.Int;

import javax.annotation.Nullable;
import java.io.File;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Iterator;

public class HotItems {

	public static void main(String[] args) throws Exception {

		// 创建 execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		// 告诉系统按照 EventTime 处理
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		// 为了打印到控制台的结果不乱序,我们配置全局的并发为1,改变并发对结果正确性没有影响
		env.setParallelism(1);

        DataStreamSource<String> textFile = env.readTextFile("data/UserBehavior1.csv");

        // 创建数据源,得到 UserBehavior 类型的 流
        SingleOutputStreamOperator<UserBehavior> ds = textFile.map(new MapFunction<String, UserBehavior>() {
            @Override
            public UserBehavior map(String value) throws Exception {
                String[] split = value.split(",");
                long userID = Long.valueOf(split[0]);
                long itemID = Long.valueOf(split[1]);
                int categoryId = Integer.valueOf(split[2]);
                String behavior = split[3];
                long timestap = Long.valueOf(split[4]);
                return new UserBehavior(userID, itemID, categoryId, behavior, timestap);
            }
        });


        // 抽取出时间和生成 watermark,水位线随时间而递增,即水位线和当前流中数据最大时间相等。
        SingleOutputStreamOperator<UserBehavior> outputStreamOperator = ds.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<UserBehavior>() {
            @Override
            public long extractAscendingTimestamp(UserBehavior userBehavior) {
                // 原始数据的时间,作为水位线
                return userBehavior.timestamp ;
            }
        });

        // 过滤出只有点击的数据
        SingleOutputStreamOperator<UserBehavior> filterOutputStream = outputStreamOperator.filter(new FilterFunction<UserBehavior>() {
            @Override
            public boolean filter(UserBehavior userBehavior) throws Exception {
                // 过滤出只有点击的数据
                return userBehavior.behavior.equals("pv");
            }
        });

        //按商品ID进行分组
        KeyedStream<UserBehavior, Tuple> keyedStream = filterOutputStream.keyBy("itemId");

        //每5分钟计算一下最近60分钟的数据
        WindowedStream<UserBehavior, Tuple, TimeWindow> windowedStream = keyedStream.timeWindow(Time.minutes(60) ,Time.minutes(5));

        //进行聚合计算.统计出每个商品的点击次数
        SingleOutputStreamOperator<ItemViewCount> apply = windowedStream.apply(new WindowFunction<UserBehavior, ItemViewCount, Tuple, TimeWindow>() {
            @Override
            public void apply(Tuple key, TimeWindow window, Iterable<UserBehavior> input, Collector<ItemViewCount> out) throws Exception {
                Long itemId = key.getField(0);
                long sum = 0;
                //统计每个商品ID点击的次数。
                Iterator<UserBehavior> iterator = input.iterator();
                while (iterator.hasNext()) {
                    sum++;
                    iterator.next();
                }

                out.collect(ItemViewCount.of(itemId, window.getEnd(), sum));
            }
        });

        KeyedStream<ItemViewCount, Tuple> windowEnd = apply.keyBy("windowEnd");
//
        SingleOutputStreamOperator<String> process = windowEnd.process(new TopNHotItems(3));

        process.print();

        env.execute("Hot Items Job");
	}

}

TopNHotItems.java

package com.xxxxx.flink.demo.topGoods;

import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;


/**
 * 求某个窗口中前 N 名的热门点击商品,key 为窗口时间戳,输出为 TopN 的结果字符串
 */
public class TopNHotItems extends KeyedProcessFunction<Tuple, ItemViewCount, String> {

    private final int topSize;

    public TopNHotItems(int topSize) {
        this.topSize = topSize;
    }

    // 用于存储商品与点击数的状态,待收齐同一个窗口的数据后,再触发 TopN 计算
    private ListState<ItemViewCount> itemState;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        ListStateDescriptor<ItemViewCount> itemsStateDesc = new ListStateDescriptor<>(
                "itemState-state",
                ItemViewCount.class);
        itemState = getRuntimeContext().getListState(itemsStateDesc);
    }

    @Override
    public void processElement(
            ItemViewCount input,
            Context context,
            Collector<String> collector) throws Exception {


        System.err.println(input);
        // 每条数据都保存到状态中,即缓存起来.
        itemState.add(input);

        // 当wartermark超过注册时间,则触发 。
        // 注册 windowEnd+1 的 EventTime Timer, 当触发时,说明收齐了属于windowEnd窗口的所有商品数据
        context.timerService().registerEventTimeTimer(input.windowEnd + 1);
    }

    @Override
    public void onTimer(
            long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {


        int sum=0;
        // 获取收到的所有商品点击量
        List<ItemViewCount> allItems = new ArrayList<>();

        for (ItemViewCount item : itemState.get()) {
            //所有商品在这个时间窗口内的总点击量
            sum += item.viewCount;
            allItems.add(item);
        }

        // 清除本次窗口的缓存数据,释放空间
        itemState.clear();
        // 按照点击量从大到小排序
        allItems.sort(new Comparator<ItemViewCount>() {
            @Override
            public int compare(ItemViewCount o1, ItemViewCount o2) {
                return (int) (o2.viewCount - o1.viewCount);
            }
        });
        // 将排名信息格式化成 String, 便于打印
        StringBuilder result = new StringBuilder();
        result.append("=============================================\n");
        result.append("时间: ").append(new Timestamp(timestamp - 1));
        result.append("  总点击量: ").append(sum).append("\n");

        for (int i = 0; i < allItems.size() && i < topSize; i++) {
            ItemViewCount currentItem = allItems.get(i);
            // No1:  商品ID=12224  浏览量=2413
            result.append("No").append(i).append(":")
                    .append("  商品ID=").append(currentItem.itemId)
                    .append("  浏览量=").append(currentItem.viewCount)
                    .append("\n");
        }
        result.append("=============================================\n\n");

        // 控制输出频率,模拟实时滚动结果
        Thread.sleep(1000);

        out.collect(result.toString());
    }
}
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值