package com.project.bigdata
object WordCount {
def main(args: Array[String]): Unit = {
//WordCount
val lineList = List(
"hadoop, spark,hadopp,hive",
"spark,spark ,mapreduce",
"hive ,spark ,hive, "
)
//Map:对数据进行分割,压平
val mapperWords: List[(String, Int)] = lineList
.flatMap(line => line.split(",").map(line => line.trim) )
.filterNot(line => line.isEmpty)
.map( word => (word,1) )
//数据分组
val groupWords: Map[String, List[(String, Int)]] = mapperWords.groupBy(tuple => tuple._1)
//reduce:每组进行数据聚合计算
val result: Map[String, Int] = groupWords
.map( tuple => {
//获取单词:
val word = tuple._1
//计算该word对应的数量
val count = tuple._2.map(t =>t._2).sum
//返回结果
(word,count)
})
//遍历输出结果
result.foreach(println)
}
}
Scala中实现Mapreduce对数据进行处理,案例WordCount
最新推荐文章于 2024-08-12 16:25:25 发布