代码如下
package rdd
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by 汪本成 on 2016/7/2.
*/
object rddJoin {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("rddJoin").setMaster("local")
val sc = new SparkContext(conf)
val rdd1 = sc.parallelize(Array((1, 21), (2, 42), (3, 41)), 1)
val rdd2 = sc.parallelize(Array((3, 4), (4, 41)), 1)
val rdd3 = rdd1.join(rdd2)
rdd3.foreach(println)
rdd1.zipWithIndex.foreach(println)
}
}
运行结果如下
16/07/02 22:42:13 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 19 ms
16/07/02 22:42:13 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
16/07/02 22:42:13 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
(3,(41,4))
16/07/02 22:42:13 INFO Executor: Finished task 0.0 in sta