目录
UserVisitSessionAnalyzeSpark.java
本篇文章将记录用户访问session分析-top10热门品类之计算各品类点击、下单和支付的次数。
代码
UserVisitSessionAnalyzeSpark.java
/** * 获取Top10的品类 * @param filteredSessionid2AggrInfoRDD * @param session2actionRDD */ private static void getTop10Category(JavaPairRDD<String, String> filteredSessionid2AggrInfoRDD, JavaPairRDD<String, Row> session2actionRDD) { JavaPairRDD<String,Row> sessionid2detailRDD = filteredSessionid2AggrInfoRDD .join(session2actionRDD) .mapToPair( new PairFunction<Tuple2<String, Tuple2<String, Row>>, String, Row>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Row> call(Tuple2<String, Tuple2<String, Row>> tuple) throws Exception { return new Tuple2<String,Row>(tuple._1,tuple._2._2); } }); // 获取session访问过的所有品类id // 访问过:指的是,点击过、下单过、支付过的品类 JavaPairRDD<Long,Long> categoryidRDD = sessionid2detailRDD.flatMapToPair( new PairFlatMapFunction<Tuple2<String, Row>, Long, Long>() { private static final long serialVersionUID = 1L; @Override public Iterator<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; List<Tuple2<Long,Long>> list = new ArrayList<>(); Long clickCategoryId = Long.valueOf(row.getLong(6)); long maxid = 10240L; if (clickCategoryId != maxid){ list.add(new Tuple2<Long,Long>(clickCategoryId,clickCategoryId)); } String orderCategoryIds = row.getString(8); if (orderCategoryIds != null){ String[] orderCategoryIdsSplited = orderCategoryIds.split(","); for (String orderCategory: orderCategoryIdsSplited){ list.add(new Tuple2<Long,Long>(Long.valueOf(orderCategory),Long.valueOf(orderCategory))); } } String payCategoryIds = row.getString(10); if (payCategoryIds != null){ String[] payCategoryIdsSplited = payCategoryIds.split(","); for (String payCategoryId : payCategoryIdsSplited){ list.add(new Tuple2<Long,Long>(Long.valueOf(payCategoryId),Long.valueOf(payCategoryId))); } } return list.iterator(); } } ); /** * 第二步:计算各品类的点击、下单和支付的次数 */ // 访问明细中,其中三种访问行为是:点击、下单和支付 // 分别来计算各品类点击、下单和支付的次数,可以先对访问明细数据进行过滤 // 分别过滤出点击、下单和支付行为,然后通过map、reduceByKey等算子来进行计算 // 计算各个品类的点击次数 JavaPairRDD<Long, Long> clickCategoryId2CountRDD = getClickCategoryId2CountRDD(sessionid2detailRDD); // 计算各个品类的下单次数 JavaPairRDD<Long, Long> orderCategoryId2CountRDD = getOrderCategoryId2CountRDD(sessionid2detailRDD); // 计算各个品类的支付次数 JavaPairRDD<Long, Long> payCategoryId2CountRDD = getPayCategoryId2CountRDD(sessionid2detailRDD); }
/** * 获取个品类点击次数RDD * @param sessionid2detailRDD * @return */ private static JavaPairRDD<Long,Long> getClickCategoryId2CountRDD( JavaPairRDD<String,Row> sessionid2detailRDD ) { JavaPairRDD<String,Row> clickActionRDD = sessionid2detailRDD.filter( new Function<Tuple2<String, Row>, Boolean>() { private static final long serialVersionUID = 1L; @Override public Boolean call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; return Long.valueOf(row.getLong(6)) !=null ? true :false; } } ); JavaPairRDD<Long,Long> clickCategoryIdRDD = clickActionRDD.mapToPair(new PairFunction<Tuple2<String, Row>, Long, Long>() { private static final long serialVersionUID = 1L; @Override public Tuple2<Long, Long> call(Tuple2<String, Row> tuple) throws Exception { long clickCategoryId = tuple._2.getLong(6); return new Tuple2<Long,Long>(clickCategoryId,1L); } }); JavaPairRDD<Long,Long> clickCategoryId2CountRDD = clickCategoryIdRDD.reduceByKey( new Function2<Long, Long, Long>() { private static final long serialVersionID = 1L; @Override public Long call(Long v1, Long v2) throws Exception { return v1+v2; } } ); return clickCategoryId2CountRDD; }
/** * 获取品类下单次数的RDD * @param sessionid2detailRDD * @return */ private static JavaPairRDD<Long,Long> getOrderCategoryId2CountRDD(JavaPairRDD<String,Row> sessionid2detailRDD){ JavaPairRDD<String,Row> orderAction = sessionid2detailRDD.filter( new Function<Tuple2<String, Row>, Boolean>() { private static final long serialVersionUID = 1L; @Override public Boolean call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; return row.getString(8) != null ? true:false; } } ); JavaPairRDD<Long,Long> orderCategoryIdRDD = orderAction.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Row>, Long, Long>() { private static final long serialVersionUID = 1L; @Override public Iterator<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; String orderCategoryIds = row.getString(8); String[] orderCategoryIdsSplited = orderCategoryIds.split(","); List<Tuple2<Long,Long>> list = new ArrayList<Tuple2<Long, Long>>(); for (String orderCategoryid: orderCategoryIdsSplited){ list.add(new Tuple2<Long,Long>(Long.valueOf(orderCategoryid),Long.valueOf(orderCategoryid))); } return list.iterator(); } }); JavaPairRDD<Long,Long> orderCategoryId2CountRDD = orderCategoryIdRDD.reduceByKey( new Function2<Long, Long, Long>() { private static final long serialVersionID = 1L; @Override public Long call(Long v1, Long v2) throws Exception { return v1+v2; } } ); return orderCategoryId2CountRDD; }
/** * 获取品类支付次数的RDD * @param sessionid2detailRDD * @return */ private static JavaPairRDD<Long,Long> getPayCategoryId2CountRDD(JavaPairRDD<String,Row> sessionid2detailRDD){ JavaPairRDD<String,Row> payAction = sessionid2detailRDD.filter( new Function<Tuple2<String, Row>, Boolean>() { private static final long serialVersionUID = 1L; @Override public Boolean call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; return row.getString(8) != null ? true:false; } } ); JavaPairRDD<Long,Long> payCategoryIdRDD = payAction.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Row>, Long, Long>() { private static final long serialVersionUID = 1L; @Override public Iterator<Tuple2<Long, Long>> call(Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; String payCategoryIds = row.getString(10); String[] payCategoryIdsSplited = payCategoryIds.split(","); List<Tuple2<Long,Long>> list = new ArrayList<Tuple2<Long, Long>>(); for (String payCategoryid: payCategoryIdsSplited){ list.add(new Tuple2<Long,Long>(Long.valueOf(payCategoryid),Long.valueOf(payCategoryid))); } return list.iterator(); } }); JavaPairRDD<Long,Long> payCategoryId2CountRDD = payCategoryIdRDD.reduceByKey( new Function2<Long, Long, Long>() { private static final long serialVersionID = 1L; @Override public Long call(Long v1, Long v2) throws Exception { return v1+v2; } } ); return payCategoryId2CountRDD; }