鄙人学习笔记
reduce方式:
// reduce-wordcount
def reduce_wordcount(words:RDD[String],sc:SparkContext):Unit={
//格式转换:word ==> Map[(word,1)]
val mpWords = words.map(word => {
//可变map
scala.collection.mutable.Map[String, Long]((word, 1))
})
val wordCount = mpWords.reduce((mp1, mp2) => {
mp2.foreach{
case (word,count)=>{
//看1中有2的单词否,没有填上
var newCount = mp1.getOrElse(word,0L)+count
mp1.update(word,newCount)
}
}
mp1
})
println(s"reduce_wordcount:${wordCount}")
}
aggregate方式:
// aggregate-wordcount
def aggregate_wordcount(words:RDD[String],sc:SparkContext):Unit={
val wordCount = words.aggregate(scala.collection.mutable.Map[String,Long]())(
//分区内计算
(mp, word) => {