//方式7 map + countByKey
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object WordCount07 {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("countByKey")
.master("local[2]")
.getOrCreate()
val sc = spark.sparkContext
val lines:RDD[String] = sc.textFile("data/thatgirl.txt")
//扁平化操作,拆分数据为 一个单词一行
val word:RDD[String] = lines.flatMap(_.split(" "))
//map转换为 (key,1)
val mapRDD:RDD[(String,Int)] = word.map((_, 1))
//countByKey直接根据Key(要求数据为key-value形式),进行统计数量(这是个Action算子)
val res:collection.Map[String,Long] = mapRDD.countByKey()
res.foreach(println)
sc.stop()
}
}
//方式8 countByValue
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object WordCount08 {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("countByValue")
.master("local[2]")
.getOrCreate()
val sc = spark.sparkContext
val lines:RDD[String] = sc.textFile("data/thatgirl.txt")
//扁平化操作,拆分数据为 一个单词一行
val word:RDD[String] = lines.flatMap(_.split(" "))
//countByValue直接进行统计数量(这是个Action算子)
val res:collection.Map[String,Long] = word.countByValue()
res.foreach(println)
sc.stop()
}
}