val rdd5 = sc.parallelize(1 to 10, 2)
rdd5.mapPartitionsWithIndex(
(partIdx, iter) => {
var part_map = scala.collection.mutable.Map[String, List[Int]]()
while (iter.hasNext) {
var part_name = "part_" + partIdx
var elem = iter.next()
if (part_map.contains(part_name)) {
var elems = part_map(part_name)
elems ::= elem
part_map(part_name) = elems
} else {
part_map(part_name) = List[Int] {
elem
}
}
}
part_map.iterator
}
).collect()
// Array[(String, List[Int])] = Array((part_0,List(5, 4, 3, 2, 1)), (part_1,List(10, 9, 8, 7, 6)))
/**
* def aggregate[U: ClassTag](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U
* 计算过程:先在每个分区中迭代执行 (x: Int, y: Int) => x + y,并且使用zeroValue的值,
* 即part_0中:zeroValue+5+4+3+2+1
* 即part_1中:zeroValue+10+9+8+7+6
* 再将两个分区的结果使用(a: Int, b: Int) => a + b进行合并,并且也是要zeroValue
* 即 zeroValue+part_0+part_1
*/
rdd5.aggregate(0)(
{ (x: Int, y: Int) => x + y }, { (a: Int, b: Int) => a + b }
)
//res23: Int = 55
rdd5.fold(0)(
(x, y) => x + y
)
//res24: Int = 55