1.accumulate累加代码:
public class SessionAggrStatAccumulator implements AccumulatorParam<String> {
private static final long serialVersionUID = 6311074555136039130L;
//zero方法,其实主要用于数据的初始化
@Override
public String zero(String v) {
return Constants.SESSION_COUNT + "=0|"
+ Constants.TIME_PERIOD_1s_3s + "=0|"
+ Constants.TIME_PERIOD_4s_6s + "=0|"
+ Constants.TIME_PERIOD_7s_9s + "=0|"
+ Constants.TIME_PERIOD_10s_30s + "=0|"
+ Constants.TIME_PERIOD_30s_60s + "=0|"
+ Constants.TIME_PERIOD_1m_3m + "=0|"
+ Constants.TIME_PERIOD_3m_10m + "=0|"
+ Constants.TIME_PERIOD_10m_30m + "=0|"
+ Constants.TIME_PERIOD_30m + "=0|"
+ Constants.STEP_PERIOD_1_3 + "=0|"
+ Constants.STEP_PERIOD_4_6 + "=0|"
+ Constants.STEP_PERIOD_7_9 + "=0|"
+ Constants.STEP_PERIOD_10_30 + "=0|"
+ Constants.STEP_PERIOD_30_60 + "=0|"
+ Constants.STEP_PERIOD_60 + "=0";
}
//在v1中,找到v2对应的value,累加1,然后再更新回连接串里面去
@Override
public String addAccumulator(String v1, String v2) {
return add(v1,v2);
}
@Override
public String addInPlace(String v1, String v2) {
return add(v1,v2);
}
//统计逻辑
private String add(String v1, String v2){
if(StringUtils.isEmpty(v1)){
return v2;
}
String oldValue = StringUtils.getFieldFromConcatString(v1, "\\|", v2);
if(oldValue != null){
int newValue = Integer.valueOf(oldValue) + 1;
return StringUtils.setFieldInConcatString(v1,"\\|",v2,String.valueOf(newValue));
}
return v1;
}
}
------------------------------------------------------------
2. session随机抽取经典代码:
final Map<String,Map<String, List<Integer>>> dateHourExtractMap =
new HashMap<String, Map<String, List<Integer>>>();
Random random = new Random();
//跟天相关依赖的代码在里面实现
for(Map.Entry<String,Map<String,Long>> dateHourCountEntry : dateHourCountMap.entrySet()){
String date = dateHourCountEntry.getKey();
Map<String, Long> hourCountMap = dateHourCountEntry.getValue();
// 计算出这一天的session总数
long sessionCount = 0L;
for(long hourCount : hourCountMap.values()){ //直接 Map<String, Long>.values()
sessionCount += hourCount;
}
Map<String, List<Integer>> hourExtractMap = dateHourExtractMap.get(date);
if(hourExtractMap == null){
hourExtractMap = new HashMap<String, List<Integer>>();
dateHourExtractMap.put(date,hourExtractMap);
}
// 遍历每个小时,跟小时相关依赖的代码在里面实现
for(Map.Entry<String, Long> hourCountEntry : hourCountMap.entrySet()){
String hour = hourCountEntry.getKey();
long count = hourCountEntry.getValue();
// 计算每个小时的session数量,占据当天总session数量的比例,直接乘以每天要抽取的数量;当前小时需要抽取的session数量
int hourExtractNumber = (int)((double)count / (double)sessionCount) // long/long 可能会得0,所以先转double
* extractNumberPerDay;
if(hourExtractNumber > count){ //可能抽取的数会大于有的session数
hourExtractNumber = (int)count;
}
// 获取当前小时存放随机数的list
List<Integer> extractIndexList = hourExtractMap.get(hour);
if(extractIndexList == null){
extractIndexList = new ArrayList<Integer>();
hourExtractMap.put(hour,extractIndexList);
}
// 生成上面计算出来的数量的随机数
for(int i = 0; i<hourExtractNumber; i++){ //小算法,不允许随机数重复
int extractIndex = random.nextInt((int) count);
while (extractIndexList.contains(extractIndex)){
extractIndex = random.nextInt((int)count);
}
extractIndexList.add(extractIndex);
}
}
--------------------------------------------------------------
3. 获取各个品类对应的点击、下单和支付的次数
private static JavaPairRDD<Long, String> joinCategoryAndData(
JavaPairRDD<Long, Long> categoryidRDD,
JavaPairRDD<Long, Long> clickCategory2CountRDD,
JavaPairRDD<Long, Long> orderCategory2CountRDD,
JavaPairRDD<Long, Long> payCategory2CountRDD) {
final JavaPairRDD<Long, Tuple2<Long, Optional<Long>>> tmpJoinRDD =
categoryidRDD.leftOuterJoin(clickCategory2CountRDD);
JavaPairRDD<Long, String> tmpMapRDD = tmpJoinRDD.mapToPair(
new PairFunction<Tuple2<Long, Tuple2<Long, Optional<Long>>>, Long, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<Long, String> call(
Tuple2<Long, Tuple2<Long, Optional<Long>>> tuple) throws Exception {
long categoryid = tuple._1;
Optional<Long> optional = tuple._2._2;
long clickCount = 0L;
if (optional.isPresent()){
clickCount = optional.get();
}
String value = Constants.FIELD_CATEGORY_ID + "=" + categoryid + "|" +
Constants.FIELD_CLICK_COUNT + "=" + clickCount;
return new Tuple2<Long, String>(categoryid,value);
}
});
4.二次排序代码:
public class CategorySortKey implements Ordered<CategorySortKey>, Serializable {
private static final long serialVersionUID = -6007890914324789180L;
//定义需要排序的多少个字段
private long clickCount;
private long orderCount;
private long payCount;
//实现有参构造方法
public CategorySortKey(long clickCount, long orderCount,long payCount){
this.clickCount = clickCount;
this.orderCount = orderCount;
this.payCount = payCount;
}
//get、set方法
public long getClickCount() {
return clickCount;
}
public long getOrderCount() {
return orderCount;
}
public long getPayCount() {
return payCount;
}
public void setClickCount(long clickCount) {
this.clickCount = clickCount;
}
public void setOrderCount(long orderCount) {
this.orderCount = orderCount;
}
public void setPayCount(long payCount) {
this.payCount = payCount;
}
//跟其他key相比,如何来判定大于、大于等于、小于、小于等于
@Override
public boolean $greater(CategorySortKey that) {
if (clickCount > that.getClickCount()){
return true;
}else if(clickCount == that.getClickCount() &&
orderCount > that.getOrderCount()){
return true;
}else if(clickCount == that.getClickCount() &&
orderCount == that.getOrderCount() &&
payCount > that.getPayCount()){
return true;
}
return false;
}
@Override
public boolean $greater$eq(CategorySortKey that) {