import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by zangtt on 17-3-9.
*/
object test {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Spark Pi").setMaster("local")
val spark = new SparkContext(conf)
var s = Array(Array(1,1),Array(2,2),Array(3,3),Array(4,4))
val rdd = spark.parallelize(s, 2)
val map = rdd.map(x => {
x
}).collect()
println(map.length + " " + map(2)(1))
}
}
Output:((1,1),(2,2),(3,3),(4,4))
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by zangtt on 17-3-9.
*/
object test {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Spark Pi").setMaster("local")
val spark = new SparkContext(conf)
var s = Array(Array(1,1),Array(2,2),Array(3,3),Array(4,4))
val rdd = spark.parallelize(s, 2)
val map = rdd.flatMap(x => {
x
}).collect()
println(map.length + " " + map(0))
}
}
Output:(1,1,2,2,3,4,4)
flatMap:先map再flat(扁平化)
另外,flatMap将RDD元素全放入迭代器中,返回的值也需要时iterator类型