WordCount入门案例
object WordCount {
def main(args: Array[String]): Unit = {
val lines = List("hadoop spark fink tomcat hive ", "hadoop scala scala spark zookeeper", "scala spark")
//val result = lines.flatMap(_.split(" ")).map((_, 1)).groupBy(_._1).map(t => (t._1, t._2.length)).toList.sortBy(_._2).reverse
/*切分lines里的每个元素
val words: immutable.Seq[Array[String]] = lines.map(x => x.split(" "))
val flatten = words.flatten
println(flatten)*/
//替换flatMap
val flatten = lines.flatMap(x => x.split(" "))
// 对flatten的元素操作 (hadoop,1)...
val tuples = flatten.map(x => (x, 1))
// 分组 Map(fink -> List((fink,1)), hadoop -> List((hadoop,1), (hadoop,1))...
val grouped = tuples.groupBy(x => x._1)
//println(grouped)
// 获取统计结果
/**
* 这里不能使用_ ,需要2个_ ,表示成2个参数,是错误的
* val map = grouped.map(_._1,_._2.size))
*/
val map = grouped.map(x => (x._1, x._2.size))
//println(map)
// 根据按照次数排序
val result = map.toList.sortBy(x => x._2).reverse
println(result)
}
}