自定义Accumulator:
Scala自定义accumulator代码:
import org.apache.spark.util.AccumulatorV2 class MyAccumulator2 extends AccumulatorV2[String,String]{ private var res = "" override def isZero: Boolean = {res == ""} override def merge(other: AccumulatorV2[String, String]): Unit = other match { //匹配任意字符,将other的值转为MyAccumulator并赋值给o case o : MyAccumulator2 => res += o.res case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def copy(): MyAccumulator2 = { val newMyAcc = new MyAccumulator2 newMyAcc.res = this.res newMyAcc } override def value: String = res override def add(v: String): Unit = res += v +"-" override def reset(): Unit = res = "" }
Scala测试类:
import org.apache.spark.{SparkConf, SparkContext} object Accumulator1 { def main(args: Array[String]) { val conf = new SparkConf().setAppName("Accumulator1").setMaster("local") val sc = new SparkContext(conf) val myAcc = new MyAccumulator2 sc.register(myAcc,"myAcc") //val acc = sc.longAccumulator("avg") val nums = Array("1","2","3","4","5","6","7","8") val numsRdd = sc.parallelize(nums) numsRdd.foreach(num => myAcc.add(num)) println(myAcc) sc.stop() } }
Java自定义accumulator:
import org.apache.spark.util.AccumulatorV2; public class StrAccu extends AccumulatorV2<String,String> { private String str = ""; //Returns if this accumulator is zero value or not. // e.g. for a counter accumulator, 0 is zero value; for a list accumulator, Nil is zero value. public boolean isZero() { return str == ""; } //Creates a new copy of this accumulator. public AccumulatorV2<String, String> copy() { StrAccu newAccumulator = new StrAccu(); newAccumulator.str = this.str; return newAccumulator; } //Resets this accumulator, which is zero value. i.e. call isZero must return true. public void reset() { str = ""; } //Takes the inputs and accumulates. public void add(String v) { str += v + "_"; } //Merges another same-type accumulator into this one and update its state, i.e. this should be merge-in-place. public void merge(AccumulatorV2<String, String> other) { StrAccu o =(StrAccu)other; str += o.str; } //Defines the current value of this accumulator public String value() { return str; } }
Java测试类:
import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.VoidFunction; import java.util.Arrays; import java.util.List; public class myStringAccu { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("AccumulatorTest").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); List<String> list = Arrays.asList("A","B","C","D","E","F","G","H","I"); final JavaRDD<String> javaRDD = sc.parallelize(list, 3).cache(); final StrAccu sa = new StrAccu(); sc.sc().register(sa,"sa"); javaRDD.foreach(new VoidFunction<String>() { public void call(String s) throws Exception { sa.add(s); } }); System.out.println(sa.value()); } }