目录
- 1.待处理数据
- 2.scala版 wordcount 程序之整体分析
- 3.scala版 wordcount 程序之分部分析
1. 待处理数据
lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
2.scala版 wordcount 程序之整体分析
处理程序
val result = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)).toList.sortBy(_._2).reverse
3.scala版 wordcount 程序之分步分析
scala> val r1 = lines.flatMap(_.split(" "))
r1: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty)
scala> val r3 = r1.map((_,1))
r3: List[(String, Int)] = List((hello,1), (tom,1), (hello,1), (jerry,1), (hello,1), (jerry,1), (hello,1), (kitty,1))
scala> val r4 = r3.groupBy(_._1)
r4: scala.collection.immutable.Map[String,List[(String, Int)]] = Map(tom -> List((tom,1)), kitty -> List((kitty,1)), jerry -> List((jerry,1), (jerry,1)), hello -> List((hello,1), (hello,1), (hello,1), (hello,1)))
scala> val r5 = r4.mapValues(_.foldLeft(0)(_+_._2))
r5: scala.collection.immutable.Map[String,Int] = Map(tom -> 1, kitty -> 1, jerry -> 2, hello -> 4)
scala> val r6 = r5.toList
r6: List[(String, Int)] = List((tom,1), (kitty,1), (jerry,2), (hello,4))
scala> val r7 = r6.sortBy(_._2)
r7: List[(String, Int)] = List((tom,1), (kitty,1), (jerry,2), (hello,4))
scala> val r8 = r7.reverse
r8: List[(String, Int)] = List((hello,4), (jerry,2), (kitty,1), (tom,1))