package SparkStreaming;
import org.apache.commons.collections.iterators.ArrayListIterator;
import org.apache.commons.io.LineIterator;
import org.apache.spark.Accumulator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.Iterator;
import java.util.List;public classtotalization_device {public static voidmain(String[] args) {
SparkConf conf= newSparkConf()
.setMaster("local[2]")
.setAppName("totalization_device");
JavaSparkContext sc= newJavaSparkContext(conf);/** 定义一个累加器
**/Accumulator accumulator = sc.accumulator(0);
JavaRDD fileRDD = sc.textFile("E:/2018_cnic/learn/wordcount.txt");
JavaRDD fileRDD1 = fileRDD.flatMap(new FlatMapFunction() {
@Overridepublic Iteratorcall(String s) throws Exception {
accumulator.add(1);return new ArrayListIterator(s.split(" "));
}
});
JavaPairRDD pairRDD = fileRDD1.mapToPair(new PairFunction() {
@Overridepublic Tuple2call(String s) throws Exception {return new Tuple2(s,1);
}
});
JavaPairRDD reducebykeyRDD = pairRDD.reduceByKey(new Function2() {
@OverridepublicInteger call(Integer integer, Integer integer2) throws Exception {return integer +integer2;
}
});
List> collect =reducebykeyRDD.collect();for(Tuple2 tup:collect){
System.out.println(tup);
}
Integer num=accumulator.value();
System.out.println("一共有:"+num+"行");
sc.close();
}
}