package com.qf.sparkStreaming.day01
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* 注意:
* 该这个:context.textFileStream("hdfs://qianfeng01:8082/streaming/") //指定到目录
* SparkSteaming处理hdfs文件,
* 只能用-put,或copy,不能用move
*/
object _03SparkStreamingReadHdfs {
def main(args: Array[String]): Unit = {
//1.配置对象
val conf: SparkConf = new SparkConf().setAppName("test").setMaster("local[2]")
//2.上下文
val context: StreamingContext = new StreamingContext(conf, Seconds(10))
//3.读取文件
val value: DStream[String] = context.textFileStream("hdfs://qianfeng01:8082/streaming/") //指定到目录
val wordCount: DStream[(String, Int)] = value.map((_, 1)).reduceByKey(_ + _) //统计单词
//4.打印,默认打印10行
wordCount.print()
//5.启动程序
context.start()
//6.防止没有读到数据,就终止计算的情况
context.awaitTermination()
}
}