val array = Array("hello word spark","hello scala","scala spark","hello scala")
val arr1: Array[Array[String]] = array.map(_.split(" ")) //[[hello word spark] , [hello scala] ...]
val arr2: Array[String] = arr1.flatten //[hello,scala,hello,spark...]
val arr3: Array[(String, Int)] = arr2.map((_,1)) //[(hello,1), (scala,1),(hello,1)...]
val arr4: Map[String, Array[(String, Int)]] = arr3.groupBy(_._1) //[(hello -> [(hello,1),(hello,1)])...]
val arr5: Map[String, Int] = arr4.map( //对map中的value第二个元素转成数组后求和
elem => (elem._1, elem._2.map(_._2).sum)
)
//最终结果,根据个数倒排
val resArr: List[(String, Int)] = arr5.toList.sortBy(-_._2)
resArr.foreach(println(_))
输出: