@Override
public Iterable<Tuple2<String, String>> call(
Tuple2<Long, Iterable<String>> tuple)
throws Exception {
long categoryid = tuple._1;
Iterator<String> iterator = tuple._2.iterator();
// 定义取topn的排序数组
String[] top10Sessions = new String[10];
while(iterator.hasNext()) {
String sessionCount = iterator.next();
long count = Long.valueOf(sessionCount.split(",")[1]);
// 遍历排序数组
for(int i = 0; i < top10Sessions.length; i++) {
// 如果当前i位,没有数据,那么直接将i位数据赋值为当前sessionCount
if(top10Sessions[i] == null) {
top10Sessions[i] = sessionCount;
break;
} else {
long _count = Long.valueOf(top10Sessions[i].split(",")[1]);
// 如果sessionCount比i位的sessionCount要大
if(count > _count) {
// 从排序数组最后一位开始,到i位,所有数据往后挪一位
for(int j = 9; j > i; j--) {
top10Sessions[j] = top10Sessions[j - 1];
}
// 将i位赋值为sessionCount
top10Sessions[i] = sessionCount;
break;
}
// 比较小,继续外层for循环
}
}
}
// 将数据写入MySQL表
List<Tuple2<String, String>> list = new ArrayList<Tuple2<String, String>>();
for(String sessionCount : top10Sessions) {
if(sessionCount != null) {
String sessionid = sessionCount.split(",")[0];
long count = Long.valueOf(sessionCount.split(",")[1]);
// 将top10 session插入MySQL表
Top10Session top10Session = new Top10Session();
top10Session.setTaskid(taskid);
top10Session.setCategoryid(categoryid);
top10Session.setSessionid(sessionid);
top10Session.setClickCount(count);
ITop10SessionDAO top10SessionDAO = DAOFactory.getTop10SessionDAO();
top10SessionDAO.insert(top10Session);
// 放入list
list.add(new Tuple2<String, String>(sessionid, sessionid));
}
}
return list;
}
});
Spark 电商分析 session分类top10点击 topN算法
最新推荐文章于 2022-04-09 20:31:37 发布