【spark学习】spark的8种word count实现方式

wordCount

groupBy

def wordCount1(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split("\\s+"))
  val wordGroupByItself: RDD[(String, Iterable[String])] = words.groupBy(word => word)
  val wordCount: Map[String, Int] = wordGroupByItself.mapValues(_.size).collect().toMap
  println(wordCount)
}

groupByKey

def wordCount3(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split("\\s+"))
  val wordByOne: RDD[(String, Int)] = words.map(word => word -> 1)
  val wordCount: Map[String, Int] = wordByOne.groupByKey().mapValues(_.toList.size).collect().toMap
}

reduceByKey

def wordCount4(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split(("\\s+")))
  val wordOne: RDD[(String, Int)] = words.map((_, 1))
  val wordCount: RDD[(String, Int)] = wordOne.reduceByKey(_ + _)
  val wordCountResult: Map[String, Int] = wordCount.collect().toMap
  println(wordCountResult)
}

aggregateByKey

def wordCount5(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split(("\\s+")))
  val wordOne: RDD[(String, Int)] = words.map((_, 1))
  val wordCount: RDD[(String, Int)] = wordOne.aggregateByKey(0)(_ + _, _ + _)
  val wordCountResult: Map[String, Int] = wordCount.collect().toMap
  println(wordCountResult)
}

foleByKey

def wordCount6(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split(("\\s+")))
  val wordOne: RDD[(String, Int)] = words.map((_, 1))
  val wordCount: RDD[(String, Int)] = wordOne.foldByKey(0)(_ + _)
  val wordCountResult: Map[String, Int] = wordCount.collect().toMap
  println(wordCountResult)
}

CombineByKey

def wordCount7(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split(("\\s+")))
  val wordOne: RDD[(String, Int)] = words.map((_, 1))
  val wordCount: RDD[(String, Int)] = wordOne.combineByKey(
    word => word,
    (x: Int, y: Int) => x + y,
    (x: Int, y: Int) => x + y
  )
  val wordCountResult: Map[String, Int] = wordCount.collect().toMap
  println(wordCountResult)
}

countByKey

def wordCount8(rdd: RDD[String]) = {
  val words: RDD[String] = rdd.flatMap(_.split(("\\s+")))
  val wordOne: RDD[(String, Int)] = words.map((_, 1))
  val wordCount: scala.collection.Map[String, Long] = wordOne.countByKey()
  println(wordCount)
}

countByValue

private def wordCount2(rdd: RDD[String]): Unit = {
  val words: RDD[String] = rdd.flatMap(_.split("\\s+"))
  val wordCount: collection.Map[String, Long] = words.countByValue()
  println(wordCount)
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值