Test 1:
package test.wyh.wordcount
import org.apache.spark.{SparkConf, SparkContext}
object TestMapIndex {
def main(args: Array[String]): Unit = {
//建立Spark连接
val sparkConf = new SparkConf().setMaster("local").setAppName("TestIndexPartitionApp")
val sc = new SparkContext(sparkConf)
val rdd = sc.makeRDD(List(1, 2, 3, 4), 2)
val indexRDD = rdd.mapPartitionsWithIndex(
(index, iter) => {
//如果是第二个分区的,输出数据(分区编号从0开始)
if (index == 1) {
//返回迭代器
iter
} else {
//否则返回空迭代器
Nil.iterator
}
}
)
indexRDD.collect().foreach(println)
//关闭连接
sc.stop()
}
}
<