累加器实现wordcount

package util
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.util.AccumulatorV2
import scala.collection.mutable

class MyAccumulator extends AccumulatorV2[String,mutable.HashMap[String,Int]] {
  private val _hashAcc = new mutable.HashMap[String,Int]()
  override def isZero: Boolean = _hashAcc.isEmpty
  override def copy():  AccumulatorV2[String, mutable.HashMap[String,Int]] = {
    val newAcc = new MyAccumulator()
    newAcc._hashAcc  ++= (_hashAcc)
    newAcc
  }
  override def reset(): Unit = _hashAcc.clear()

  override def add(v: String): Unit = {
    _hashAcc.get(v) match{
      case None => _hashAcc +=((v,1))
      case Some(a) => _hashAcc +=((v,a+1))
    }
  }
  //分区求和
  override def merge(other: AccumulatorV2[String, mutable.HashMap[String,Int]]): Unit={
    other match{
      case o:AccumulatorV2[String, mutable.HashMap[String,Int]] =>{
        for((k,v) <- o.value){
          _hashAcc.get(k) match {
            case None => _hashAcc +=((k,v))
            case Some(a) => _hashAcc += ((k,v+a))
          }
        }
      }
    }
  }
  override def value:  mutable.HashMap[String,Int] = {
    _hashAcc
  }
}
object WordCount{
  def main(args :Array[String]):Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("demo")
    val sc = new SparkContext(conf)
    val line = sc.parallelize(List("a","b","c","d","e","f","a","a","b","c"))
    val acc = new MyAccumulator()
    sc.register(acc,"test")
    line.foreach(acc.add(_))
    for((k,v) <- acc.value.toList.sortBy(_._2).reverse){
      println(k+","+v)
    }
    sc.stop()
  }


}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值