53.Spark大型电商项目-用户访问session分析-top10活跃session之计算top10品类被各sessoin点击的次数

目录

代码

UserVisitSessionAnalyzeSpark.java


本篇文章记录用户访问session分析-top10活跃session之计算top10品类被各sessoin点击的次数。

代码

UserVisitSessionAnalyzeSpark.java

/**
     * 获取top10活跃session
     * @param sc
     * @param taskId
     * @param top10CategoryList
     * @param session2detailRDD
     */

    private static void getTop10Session(JavaSparkContext sc, long taskId,
                                        List<Tuple2<CategorySortKey, String>> top10CategoryList,
                                        JavaPairRDD<String, Row> session2detailRDD) {

        List<Tuple2<Long,Long>> top10CategoryIdList = new ArrayList<Tuple2<Long, Long>>();

        for (Tuple2<CategorySortKey,String> category: top10CategoryList){
            long categoryId = Long.valueOf(StringUtils.getFieldFromConcatString(category._2,"\\|",Constants.FIELD_CATEGORY_ID));
            top10CategoryIdList.add(new Tuple2<Long,Long>(categoryId,categoryId));
        }

        JavaPairRDD<Long,Long> top10CategoryIdRDD = sc.parallelizePairs(top10CategoryIdList);

        /**
         * 第二步:计算top10热门品类被各session点击的次数
         */
        JavaPairRDD<String,Iterable<Row>> sessionid2detailsRDD =
                session2detailRDD.groupByKey();

        JavaPairRDD<Long,String> categoryid2sessionCountRDD = sessionid2detailsRDD.flatMapToPair(
                new PairFlatMapFunction<Tuple2<String, Iterable<Row>>, Long, String>() {
                    private static final long serialVersionUID = 1L;
                    @Override
                    public Iterator<Tuple2<Long, String>> call(Tuple2<String, Iterable<Row>> tuple) throws Exception {
                        String sessionid = tuple._1;
                        Iterator<Row> iterator = tuple._2.iterator();
                        Map<Long,Long> categoryCountMap = new HashMap<Long,Long>();
                        while (iterator.hasNext()){
                            Row row = iterator.next();
                            if (row.getLong(6) != Long.MAX_VALUE){
                                long categoryid = row.getLong(6);
                                Long count = categoryCountMap.get(categoryid);
                                if (count == null){
                                    count = 0L;
                                }
                                count++;
                                categoryCountMap.put(categoryid,count);
                            }
                        }
                        //返回结果,<categoryid,sessionid,count>格式
                        List<Tuple2<Long,String>> list = new ArrayList<Tuple2<Long, String>>();
                        for (Map.Entry<Long,Long> categoryCountEntry: categoryCountMap.entrySet()){
                            long categoryid = categoryCountEntry.getKey();
                            long count = categoryCountEntry.getValue();
                            String value = sessionid +"," + count;
                            list.add(new Tuple2<Long,String>(categoryid,value));
                        }
                        return  list.iterator();
                    }
                }
        );

        //获取到top10热门品类,被各个session点击的次数

        JavaPairRDD<Long,String> top10CategorySessionCountRDD = top10CategoryIdRDD
                .join(categoryid2sessionCountRDD)
                .mapToPair(new PairFunction<Tuple2<Long, Tuple2<Long, String>>, Long, String>() {

                    private static final long serialVersionUID = 1L;
                    @Override
                    public Tuple2<Long, String> call(Tuple2<Long, Tuple2<Long, String>> tuple) throws Exception {
                        return new Tuple2<Long,String>(tuple._1,tuple._2._2);
                    }
                });
    }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值