spark中很常用的两个transformation算子map和flatmap,很多人都不是特别清楚他们之间的区别,今天就简单来说一下:
我们先来看下面一个demo:
package spark
import org.apache.spark.{SparkConf, SparkContext}
object TestDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Spark Streaming Jason").setMaster("local")
val sc = new SparkContext(conf)
val rdd =sc.parallelize(Array("a=b","c=d","e=f"))
rdd.foreach(println)
println("-----------------------------------------------")
val map_rdd = rdd.map(_.split("=")).foreach(x=>println(x.mkString(",")))
println("-----------------------------------------------")
val flatmap_rdd = rdd.flatMap(_.split("=")).foreach(x=>println(x.mkString(",")))
}
}
运行后的输出如下: