java版
package cn.spark.study.core;
import org.apache.spark.Accumulator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import java.util.*;
public class AccumulatorVairable {
public static void main(String[] args) {
SparkConf conf =new SparkConf().setAppName("persist").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
final Accumulator<Integer> accumulator = sc.accumulator(0);
List<Integer> numberlist =Arrays.asList(1,2,3,4,5);
JavaRDD<Integer> num =sc.parallelize(numberlist);
num.foreach(new VoidFunction<Integer>() {
@Override
public void call(Integer arg0) throws Exception {
accumulator.add(arg0);
}
});
System.out.println(accumulator.value());
sc.close();
}
}
python版
import os
import sys
os.environ['SPARK_HOME'] = '/opt/spark'
sys.path.append("/opt/spark/python")
from pyspark import SparkContext
from pyspark import SparkConf
if __name__ == '__main__':
sc = SparkContext("local", "reduce")
sumact = sc.accumulator(0)
list = [1, 2, 3, 4, 5]
listRdd = sc.parallelize(list)
def f(num):
global sumact
sumact +=num
listRdd.foreach(f)
print sumact