目录
UserVisitSessionAnlyzeSpark.java
本篇文章将记录用户访问session分析-top10热门品类之获取session访问过的所有品类。
代码
UserVisitSessionAnlyzeSpark.java
/** * 获取top10热门品类 * @param filteredSessionid2AggrInfoRDD * @param sessionid2actionRDD */ private static void getTop10Category( JavaPairRDD<String, String> filteredSessionid2AggrInfoRDD, JavaPairRDD<String, Row> sessionid2actionRDD) { /** * 第一步:获取符合条件的session访问过的所有品类 */ // 获取符合条件的session的访问明细 JavaPairRDD<String, Row> sessionid2detailRDD = filteredSessionid2AggrInfoRDD .join(sessionid2actionRDD) .mapToPair(new PairFunction<Tuple2<String,Tuple2<String,Row>>, String, Row>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Row> call( Tuple2<String, Tuple2<String, Row>> tuple) throws Exception { return new Tuple2<String, Row>(tuple._1, tuple._2._2); } }); // 获取session访问过的所有品类id // 访问过:指的是,点击过、下单过、支付过的品类 JavaPairRDD<Long, Long> categoryidRDD = sessionid2detailRDD.flatMapToPair( new PairFlatMapFunction<Tuple2<String,Row>, Long, Long>() { private static final long serialVersionUID = 1L; @Override public Iterable<Tuple2<Long, Long>> call( Tuple2<String, Row> tuple) throws Exception { Row row = tuple._2; List<Tuple2<Long, Long>> list = new ArrayList<Tuple2<Long, Long>>(); Long clickCategoryId = row.getLong(6); if(clickCategoryId != null) { list.add(new Tuple2<Long, Long>(clickCategoryId, clickCategoryId)); } String orderCategoryIds = row.getString(8); if(orderCategoryIds != null) { String[] orderCategoryIdsSplited = orderCategoryIds.split(","); for(String orderCategoryId : orderCategoryIdsSplited) { list.add(new Tuple2<Long, Long>(Long.valueOf(orderCategoryId), Long.valueOf(orderCategoryId))); } } String payCategoryIds = row.getString(10); if(payCategoryIds != null) { String[] payCategoryIdsSplited = payCategoryIds.split(","); for(String payCategoryId : payCategoryIdsSplited) { list.add(new Tuple2<Long, Long>(Long.valueOf(payCategoryId), Long.valueOf(payCategoryId))); } } return list; } }); }