import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by liupeng on 2017/6/16.
*/
object A_collect {
System.setProperty("hadoop.home.dir","F:\\hadoop-2.6.5")
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("collect_test").setMaster("local")
val sc = new SparkContext(conf)
//准备一下数据
val nameList : List[Int] = List(1,2,3,4,5)
val numbers = sc.parallelize(nameList)
.map(_ * 2)
//将RDD序列化到driver端
//建议用foreach action操作;collect在远程集群上遍历RDD的元素,将分布式的在远程集群里面的数据拉取到本地,如果数据量大,会走大量的网络传输,甚至有可能OOM内存溢出
val doubleNumbers : Array[Int] = numbers.collect()
for (num <- doubleNumbers)
{
println(num)
}
}
}
运行结果:
2
4
6
8
10