import org.apache.spark.util.AccumulatorV2
import scala.collection.mutable
/**
* @author tianmin
* @date 2020/3/3 0003
* @notes Spark自定义累加器
*/
class SessionAccumulator extends AccumulatorV2[String, mutable.HashMap[String, Int]] {
// 维护一个自定义累加器
val countMap = new mutable.HashMap[String, Int]()
// 是否为空
override def isZero: Boolean = {
countMap.isEmpty
}
// 复制一个累加器
override def copy(): AccumulatorV2[String, mutable.HashMap[String, Int]] = {
val acc = new SessionAccumulator
// map拼接
acc.countMap ++= this.countMap
acc
}
// 清空累加器
override def reset(): Unit = {
countMap.clear()
}
// 添加
override def add(v: String): Unit = {
// 不存在,则添加(v,0)
if (!countMap.contains(v)) {
countMap += (v -> 0)
}
// 统一加1
countMap.update(v, countMap(v) + 1)
}
// 两个Map合并,key相同的累加
override def merge(other: AccumulatorV2[String, mutable.HashMap[String, Int]]): Unit = {
other match {
case acc: SessionAccumulator =>
acc.countMap.foldLeft(this.countMap) {
case (map, (k, v)) => {
map += (k -> (map.getOrElse(k, 0) + v))
}
}
}
}
override def value: mutable.HashMap[String, Int] = {
this.countMap
}
}
Spark自定义累加器
最新推荐文章于 2022-11-05 21:04:36 发布