目录
UserVisitSessionAnalyzeSpark.java
本篇文章记录用户访问session分析-top10热门品类之进行二次排序。
代码
UserVisitSessionAnalyzeSpark.java
/** * 第五步:将数据映射成<CategorySortKey,info>格式的RDD,然后进行二次排序(降序) */ JavaPairRDD<CategorySortKey,String> sortKey2countRDD = categoryid2countRDD.mapToPair( new PairFunction<Tuple2<Long, String>, CategorySortKey, String>() { private static final long serialVersionUID = 1L; @Override public Tuple2<CategorySortKey, String> call(Tuple2<Long, String> tuple) throws Exception { String countInfo = tuple._2; long clickCount = Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\\\|",Constants.FIELD_CLICK_COUNT)); long orderCount = Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\\\|",Constants.FIELD_ORDER_COUNT)); long payCount = Long.valueOf(StringUtils.getFieldFromConcatString(countInfo,"\\\\|",Constants.FIELD_PAY_COUNT)); CategorySortKey categorySortKey = new CategorySortKey(clickCount,orderCount,payCount); return new Tuple2<CategorySortKey,String>(categorySortKey,countInfo); } } ); JavaPairRDD<CategorySortKey,String> sortedCategoryCountRDD = sortKey2countRDD.sortByKey(false);