//第一步,给RDD中的每个key都打上一个随机前缀。JavaPairRDDrandomPrefixRdd=rdd.mapToPair(newPairFunction,String,Long>(){privatestaticfinallon...
// 第一步,给RDD中的每个key都打上一个随机前缀。
JavaPairRDD randomPrefixRdd = rdd.mapToPair(
new PairFunction, String, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(Tuple2 tuple)throws Exception
{
Random random = new Random();
int prefix = random.nextInt(10);
return new Tuple2(prefix + "_" + tuple._1, tuple._2);
}
});
// 第二步,对打上随机前缀的key进行局部聚合。
JavaPairRDD localAggrRdd = randomPrefixRdd.reduceByKey(
new Function2() {
private static final long serialVersionUID = 1L;
@Override
public Long call(Long v1, Long v2) throws Exception {
return v1 + v2;
}
});
// 第三步,去除RDD中每个key的随机前缀。
JavaPairRDD removedRandomPrefixRdd = localAggrRdd.mapToPair(
new PairFunction, Long, Long>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2 call(Tuple2 tuple)
throws Exception {
long originalKey = Long.valueOf(tuple._1.split("_")[1]);
return new Tuple2(originalKey, tuple._2);
}
});
// 第四步,对去除了随机前缀的RDD进行全局聚合。
JavaPairRDD globalAggrRdd = removedRandomPrefixRdd.reduceByKey(
new Function2() {
private static final long serialVersionUID = 1L;
@Override
public Long call(Long v1, Long v2) throws Exception {
return v1 + v2;
}
});
展开