docker
docker pull sequenceiq/spark:1.6.0
docker run -it -p 8088:8088 -p 8042:8042 -p 4040:4040 -h sandbox sequenceiq/spark:1.6.0 bash
maven
org.springframework.boot
spring-boot-starter-actuator
org.springframework.boot
spring-boot-starter-test
test
org.apache.spark
spark-core_${scala.binary.version}
${spark.version}
org.slf4j
slf4j-log4j12
log4j
log4j
org.apache.spark
spark-streaming_${scala.binary.version}
${spark.version}
commons-logging
commons-logging
org.apache.spark
spark-streaming-kafka_${scala.binary.version}
${spark.version}
com.fasterxml.jackson.module
jackson-module-scala_${scala.binary.version}
2.7.3
com.fasterxml.jackson.module
jackson-module-jaxb-annotations
2.7.4
com.fasterxml.jackson.core
jackson-databind
2.7.4
com.fasterxml.jackson.core
jackson-annotations
2.7.4
版本
2.10
1.6.1
主要需要引入jackson的scala版本,否则报错如下:
Exception in thread "main" java.lang.VerifyError: class com.fasterxml.jackson.module.scala.ser.ScalaIteratorSerializer overrides final method withResolved.(Lcom/fasterxml/jackson/databind/BeanProperty;Lcom/fasterxml/jackson/databind/jsontype/TypeSerializer;Lcom/fasterxml/jackson/databind/JsonSerializer;)Lcom/fasterxml/jackson/databind/ser/std/AsArraySerializerBase;
streaming
/**
* nc -lk 9999
* http://192.168.0.102:4040
*/
public void start(){
ch.qos.logback.classic.Logger root = (ch.qos.logback.classic.Logger)LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME);
root.setLevel(Level.WARN);
SparkConf sparkConf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
// Create a DStream that will connect to hostname:port, like localhost:9999
JavaReceiverInputDStream lines = streamingContext.socketTextStream("localhost", 9999);
// Split each line into words
JavaDStream words = lines.flatMap(
new FlatMapFunction() {
@Override public Iterable call(String x) {
LOGGER.debug("flatMap called -> [{}]", x);
return Arrays.asList(x.split(" "));
}
});
// Count each word in each batch
JavaPairDStream pairs = words.mapToPair(
new PairFunction() {
@Override public Tuple2 call(String s) {
return new Tuple2(s, 1);
}
});
JavaPairDStream wordCounts = pairs.reduceByKey(
new Function2() {
@Override public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
// Print the first ten elements of each RDD generated in this DStream to the console
wordCounts.print();
streamingContext.start(); // Start the computation
streamingContext.awaitTermination(); // Wait for the computation to terminate
}
run
启动netcat
nc -lk 9999
运行,然后输入空格间隔的字符串,然后打开spark-ui
http://192.168.0.102:4040/
docs