object Test02 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("test").setMaster("local")
val sc = new SparkContext(conf)
val arr = sc.parallelize(Array("hello mysql hadoop", "mysql hello"))
val res = arr.flatMap(_.split(" +")).map((_, 1)).reduceByKey(_ + _)
res.foreach(println(_))
val arr1 = sc.parallelize(Array(Array("hello","mysql","hadoop"), Array("hello","world","hadoop")))
val res1 = arr1.flatMap(_.iterator).map((_,1)).reduceByKey(_+_)
res1.foreach(println(_))
val arr2 = sc.parallelize(Array(Array("hello,mysql,hadoop"), Array("hello,world,hadoop")))
val res2 = arr2.flatMap(_.flatMap(_.split(","))).map((_, 1)).reduceByKey(_ + _)
res2.foreach(println(_))
}
}
object Test02 { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("test").setMaster("local") val sc = new SparkContext(conf) val arr = sc.parallelize(Array("hello mysql hadoop", "mysql hello")) .