object Spark04_Acc_WordCount {
def main(args: Array[String]): Unit = {
val spark: SparkConf = new SparkConf().setMaster("local[*]").setAppName("spark")
val sc: SparkContext = new SparkContext(spark)
var rdd=sc.makeRDD(List("hello","spark","hello","scala"))
//累加器
//创建累加器对象
val wcAcc=new MyAccumulator
//向spark注册
sc.register(wcAcc,"wordCountAcc")
rdd.foreach(word=>{
//使用累加器
wcAcc.add(word)
})
println(wcAcc.value)
}
/**
* 1.集成AccumulatorV2 定义泛型
* in:累加器输入的数据类型
* out:累加器返回的数据类型
* 本案例对应的in 为String类型
* out 为Map[String,Long]
*
* 2.重写方法
*
*/
class MyAccumulator extends AccumulatorV2[String,mutable.Map[String,Long]]{
private var wcMap=mutable.Map[String,Long]()
//判断是否为初始状态
override def isZero: Boolean = {
wcMap.isEmpty
}
//复制对象
override def copy(): AccumulatorV2[String, mutable.Map[String, Long]] = {
new MyAccumulator()
}
//清楚数据
override def reset(): Unit = {
wcMap.clear()
}
//获取累加器累加的值
override def add(word: String): Unit = {
val newCnt=wcMap.getOrElse(word,0L)+1L
wcMap.update(word,newCnt)
}
//合并不同分区,不同Driver端的累加器
override def merge(other: AccumulatorV2[String, mutable.Map[String, Long]]): Unit = {
val map1=this.wcMap
val map2=other.value
map2.foreach{
case (word,count)=>{
val newCnt=map1.getOrElse(word,0L)+count
map1.update(word,newCnt)
}
}
}
//获取累加器结果
override def value: mutable.Map[String, Long] = {
wcMap
}
}
}
spark07-自定义累加器实现wordCount
最新推荐文章于 2024-08-05 18:29:14 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)