Scala里的WordCount 案例

7.7.5 普通 WordCount 案例

在这里插入图片描述

package chapter07


object TestWordCount__简单版 {
  def main(args: Array[String]): Unit = {
    //单词计数:将集合中出现的相同单词计数,进行计数,取计数排名的前三的结果
    val stringList = List("Hello Scala Hbase kafka", "Hello Scala Hbase", " Hello Scala", "Hello")

    //1.将每个字符串转换成一个一个单词
    val wordList:List[String] = stringList.flatMap(str => str.split(" "));
    println(wordList)
    println("------------------------------------------------------------------------------------------")

    //2.将相同的单词放置在一起,则需要用Map进行(k,v)操作
    val wordSame:Map[String,List[String]] = wordList.groupBy(word => word)
    println(wordSame)
    println("------------------------------------------------------------------------------------------")

    //3.对相同的单词进行计数
    //(word,list) => (word,count)
    val wordToCount:Map[String,Int] = wordSame.map(tuple=>(tuple._1,tuple._2.size))
    println(wordToCount)
    println("------------------------------------------------------------------------------------------")

    //4.对计数完成后的结果进行排序(采用降序)
    val sortList:List[(String,Int)] = wordToCount.toList.sortWith{
      (A,B) =>{
        A._2>B._2
      }
    }
    println(sortList)
    println("------------------------------------------------------------------------------------------")

    //5.对排序后的结果取前3名、
    val resThreeList:List[(String,Int)] = sortList.take(3)
    println(resThreeList)

  }

}

7.7.6 复杂 WordCount 案例

TestWordCount__复杂版__方式01

package chapter07

object TestWordCount__复杂版__方式01 {
  def main(args: Array[String]): Unit = {
    val tupleList = List(("Hello Scala Spark World ", 4), ("Hello Scala Spark", 3), (" Hello Scala", 2), ("Hello", 1))
    val stringList:List[String]  = tupleList.map(t => (t._1 + " ") * t._2)
    val words:List[String] = stringList.flatMap(s=>s.split(" "))
    //在map中,如果传进来什么就返回什么,不要用 _ 省略
    val groupMap:Map[String,List[String]] = words.groupBy(word => word)
    //val groupMap:Map[String,List[String]] = words.groupBy(_)

    // //(word,list) => (word,count)
    val wordToCount:Map[String,Int] = groupMap.map(t=>(t._1,t._2.size))

    val wordCountList:List[(String,Int)] = wordToCount.toList.sortWith{
      (left,right) =>{
        left._2 > right._2
      }
    }.take(3)

    tupleList.map(t=>(t._1+" ")*t._2).flatMap(_.split(" ").groupBy(word=>word).map(t=>(t._1,t._2.size)))
    println(wordCountList)
  }
}

TestWordCount__复杂版__方式02

package chapter07

object TestWordCount__复杂版__方式02 {
  def main(args: Array[String]): Unit = {
    val tuples = List(("Hello Scala Hbase kafka",4),( "Hello Scala Hbase",3),(" Hello Scala",2),( "Hello",1))
    val wordToCountList:List[(String,Int)] = tuples.flatMap{
      t =>{
        val strings:Array[String] = t._1.split(" ")
        strings.map(word=>(word,t._2))
      }
    }
    // Hello, List((Hello,4), (Hello,3), (Hello,2), (Hello,1))
    // Scala, List((Scala,4), (Scala,3), (Scala,2)
    // Spark, List((Spark,4), (Spark,3)
    // Word, List((Word,4))
    val wordToTuoleMap:Map[String,List[(String,Int)]] = wordToCountList.groupBy(t=>t._1)
    val stringToInts:Map[String,List[Int]] = wordToTuoleMap.mapValues{
      datas=> datas.map(t=>t._2)
    }
    stringToInts
    //---------------------------------------------------------------------
    val wordToCountMap:Map[String,List[Int]] = wordToTuoleMap.map{
      t=>{
        (t._1,t._2.map(t1 => t1._2))
      }
    }
    val wordToTotalCountMap:Map[String,Int] = wordToTotalCountMap.map(t=>(t._1,t._2))
    println(wordToCountMap)
  }

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值