public static void main(String[] args) { SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("JavaSparkStreamingHDFS"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext jsc = new JavaStreamingContext(sc, Durations.seconds(10)); //监听网络数据流 //JavaReceiverInputDStream<String> javaInputDStream = sc.socketTextStream("localhost", 9999); //JavaDStream<String> javaDStream = javaInputDStream.flatMap(s -> Arrays.asList(s.split(" ")).iterator()); /** * 监听hdfs指定目录中的数据 */ String directory = "hdfs://master:9000/input/spark/streaming"; JavaDStream<String> linesDStream = jsc.textFileStream(directory); JavaDStream<String> wordsDStream = linesDStream.flatMap(line -> { return Arrays.asList(line.split(" ")); }); JavaPairDStream<String, Integer> pairDStream = wordsDStream.mapToPair(word -> { return new Tuple2<String, Integer>(word, 1); }); JavaPairDStream<String, Integer> retDStream = pairDStream.reduceByKey((v1, v2) -> { return v1 + v2; }); retDStream.print(); jsc.start(); jsc.awaitTermination(); }
SparkStreaming之HDFS操作
最新推荐文章于 2024-01-04 06:30:00 发布