实现SparkStream类:两个版本二选一即可
实现SparkStream类(Scala版)
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkStreamDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("sparkStream")
// 采集周期,指定的3秒为每次采集的时间间隔
val streamingContext = new StreamingContext(conf,Seconds(3))
// 指定采集的端口
val socketLineStream: ReceiverInputDStream[String] = streamingContext.socketTextStream("192.168.**.**",7777)
//指定采集本地目录
//val socketLineStream: DStream[String] = streamingContext.textFileStream("file:///D:/ideaProject/SparkStream/in")
// 将采集的信息进行处理,统计数据(wordcount)
val wordStream: DStream[String] = socketLineStream.flatMap(line=>line.split("\\s+"))
val mapStream: DStream[(String, Int)] = wordStream.map(x=>(x,1))
val wordcountStream: DStream[(String, Int)] = mapStream.reduceByKey(_+_)
// 打印
wordcountStream.print()
// 启动采集器
streamingContext.start()
streamingContext.awaitTermination()
}
}
实现SparkStream类(Java版)
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
import java.util.Arrays;
import java.util.Iterator;
public class SparkStreamJava {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("sparkStreamjava");
JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(3));
JavaReceiverInputDStream<String> lines = jsc.socketTextStream("192.168.**.**", 7777);
JavaDStream<String> flatMap = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String s) throws Exception {
String[] split = s.split("\\s+");
return Arrays.asList(split).iterator();
}
});
JavaPairDStream<String, Integer> mapToPair = flatMap.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<String, Integer>(s, 1);
}
});
JavaPairDStream<String, Integer> reduceByKey = mapToPair.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer integer, Integer integer2) throws Exception {
return integer + integer2;
}
});
reduceByKey.print();
jsc.start();
try {
jsc.awaitTermination();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
启动SparkStreaming
启用端口
- 启用命令:
nc -lk 7777
- 输入数据
hello world
hello java
hello spark
查看Streaming输出内容