Transform算子-使用Java开发
Logger.getLogger("org").setLevel(Level.WARN);
SparkConf conf = new SparkConf();
conf.setMaster("local[2]").setAppName("WordBlock");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(2));
JavaRDD<String> blackRDD = jssc.sparkContext().parallelize(Arrays.asList("?", "!", ",", "."));
Broadcast<JavaRDD<String>> broadcast = jssc.sparkContext().broadcast(blackRDD);
JavaReceiverInputDStream<String> dStream = jssc.socketTextStream("localhost", 9999);
JavaPairDStream<String, Integer> wordOneStream = dStream.flatMap(line -> Arrays.asList(line.split(" ")).iterator())
.mapToPair(word -> new Tuple2<>(word, 1));
JavaPairDStream<String, Integer> result = wordOneStream.transform(new Function<JavaPairRDD<String, Integer>, JavaRDD<String>>() {
@Override
public JavaRDD<String> call(JavaPairRDD<String, Integer> pairRDD) throws Exception {
JavaRDD<String> blackBroadcast = broadcast.getValue();
JavaPairRDD<String, Boolean> blackRDD = blackBroadcast.mapToPair(word -> new Tuple2<>(word, true));
JavaPairRDD<String, Tuple2<Integer, Optional<Boolean>>> resultRDD = pairRDD.leftOuterJoin(blackRDD);
JavaRDD<String> resultString = resultRDD.filter(new Function<Tuple2<String, Tuple2<Integer, Optional<Boolean>>>, Boolean>() {
@Override
public Boolean call(Tuple2<String, Tuple2<Integer, Optional<Boolean>>> resultTuple2) throws Exception {
return !resultTuple2._2._2.isPresent();
}
}).map(new Function<Tuple2<String, Tuple2<Integer, Optional<Boolean>>>, String>() {
@Override
public String call(Tuple2<String, Tuple2<Integer, Optional<Boolean>>> stringTuple2) throws Exception {
return stringTuple2._1;
}
});
return resultString;
}
}).mapToPair(word -> new Tuple2<>(word, 1)).reduceByKey((v1, v2) -> v1 + v2);
result.print();
jssc.start();
jssc.awaitTermination();
jssc.stop();