Top1:GroupBy
object WordCount_GroupBy {
def main(args: Array[String]): Unit = {
val sc: SparkContext = new SparkContext(new SparkConf().setMaster("local[*]").setAppName("WordCount_GroupBy"))
val fileRDD: RDD[String] = sc.textFile("datas/1.txt")
val rdd: RDD[String] = fileRDD.flatMap(_.split(" "))
val groupRDD: RDD[(String, Iterable[String])] = rdd.groupBy(str => str)
val res: RDD[(String, Int)] = groupRDD.map(elem => {
(elem._1, elem._2.size)
})
res.collect foreach println
sc.stop()
}
}
Top2:GroupByKey
object WordCount_GroupByKey {
def main(args: Array[String]): Unit = {
val sc: SparkContext = new SparkContext(new SparkConf().setMaster("local[*]").setAppName("WordCount_GroupByKey"))
val fileRDD: RDD[String] = sc.textFile("datas/1.txt")
val rdd: RDD[String] = fileRDD.flatMap(_.split(" "))
val mapRDD: RDD[(String, Int)] = rdd.map((_, 1))
val groupRdd: RDD[(String, Iterable[Int])] = mapRDD.groupByKey()
val res: RDD[(String, Int)] = groupRdd.map(elem => {
(elem._1, elem._2.sum)
})
res.collect foreach println
sc.stop()
}
}
Top3: Reduce
object WordCount_Reduce {
def main(args: Array[String]): Unit = {
val sc: SparkContext = new SparkContext(new SparkConf().setMaster("local[*]").setAppName(