object $18_WordCountHight { def main(args: Array[String]): Unit = { val tupleList = List(("Hello Scala Spark World", 4), ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1)) //1、切割+压平,给单词赋予初始次数 val words = tupleList.flatMap(x=>{ //x = ("Hello Scala Spark World", 4) //切割 val arr = x._1.split(" ") //Array(Hello,Scala,Spark,Word) val tu = arr.map(y=>{ //y = Hello (y,x._2) }) tu }) //List( (Hello,4),(Scala,4),(Spark,4),(Word,4),(Hello,3),(Scala,3),(Spark,3),(Hello,2),(Scala,2),(Hello,1) ) //2、按照单词分组 val groupedMap = words.groupBy(x=> x._1) //Map( // Hello-> List( (Hello,4), (Hello,3), (Hello,2), (Hello,1) ) // .... // ) //3、统计单词个数 val result = groupedMap.map(x=>{ //x = Hello-> List( (Hello,4), (Hello,3), (Hello,2), (Hello,1) ) //val r = x._2.reduce((agg,curr)=> (agg._1, agg._2+curr._2)) //r val r = x._2.map(y=>y._2).sum (x._1, r ) }) //4、结果展示 result.foreach(x=>println(x)) //List((Hello,10),(Scala,9),(Spark,7),(World,4)) } }
scala中WordCountHight
最新推荐文章于 2024-07-20 19:36:22 发布