package com.sparkproject.abc; import org.apache.spark.Accumulator; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.broadcast.Broadcast; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.Time; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaReceiverInputDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import scala.Tuple2; import java.util.*; public class BroadAndAccu { public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("BroadAndAccu") .setMaster("local[2]"); JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(20)); List<String> list = new ArrayList(Arrays.asList("hadoop","spark","kafka")); final Broadcast<List<String>> broadcast = jsc.sc().broadcast(list); final Accumulator<Integer> accumulator = jsc.sc().accumulator(0); JavaReceiverInputDStream<String> lines = jsc.socketTextStream("node",9999); JavaDStream<String> javaDStream = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) throws Exception { return Arrays.asList(s.split(" ")); } }); JavaPairDStream<String, Integer> javaPairDStream = javaDStream.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) throws Exception { return new Tuple2<String, Integer>(s,1); } }); javaPairDStream.foreach(new Function2<JavaPairRDD<String, Integer>, Time, Void>() { @Override public Void call(JavaPairRDD<String, Integer> v1, Time v2) throws Exception { v1.filter(new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> v1) throws Exception { if(broadcast.getValue().contains(v1._1)) { accumulator.add(1); return false; }else { return true; } } }).collect(); System.out.println("累计过滤的个数:" + accumulator.value()); return null; } }); jsc.start(); jsc.awaitTermination(); } }
广播变量和累加器
最新推荐文章于 2024-01-13 17:37:27 发布