package com.test;
import java.util.*;
import org.apache.spark.SparkConf;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka010.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.api.java.Optional;
import scala.Tuple2;
public class Test5 {
public static void main(String[] args) throws InterruptedException {
// 接收数据的地址和端口
final JavaPairRDD[] lastRdd = new JavaPairRDD[1];
SparkConf conf = new SparkConf().setMaster("local").setAppName(
"streamingTest");
JavaSparkContext sc = new JavaSparkContext(conf);
sc.setLogLevel("ERROR");
sc.setCheckpointDir("./checkpoint");
JavaStreamingContext ssc = new JavaStreamingContext(sc,
Durations.seconds(10));
// kafka相关参数,必要!缺了会报错
Map kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "192.168.174.200:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "newgroup2");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection topics = Arrays.asList("test");
JavaInputDStream> stream = KafkaUtils
.createDirectStream(ssc, LocationStrategies.PreferConsistent(),
ConsumerStrategies. Subscribe(topics,
kafkaParams));
// 注意这边的stream里的参数本身是个ConsumerRecord对象
JavaPairDStream counts = stream
.flatMap(
x -> Arrays.asList(x.value().toString().split(" "))
.iterator())
.mapToPair(x -> new Tuple2(x, 1))
.reduceByKey((x, y) -> x + y);
//counts.print();
JavaPairDStream result = counts
.updateStateByKey(new Function2, Optional, Optional>() {
private static final long serialVersionUID = 1L;
@Override
public Optional call(List values,
Optional state) throws Exception {
/**
* values:经过分组最后 这个key所对应的value,如:[1,1,1,1,1]
* state:这个key在本次之前之前的状态
*/
Integer updateValue = 0;
if (state.isPresent()) {
updateValue = state.get();
}
for (Integer value : values) {
updateValue += value;
}
return Optional.of(updateValue);
}
});
result.print();
ssc.start();
ssc.awaitTermination();
ssc.close();
}
}