import org.apache.spark._
import org.apache.spark.streaming._
object Stream {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setMaster("spark://10.149.252.106:7077").setAppName("Streaming");
// 创建Streaming的上下文,包括Spark的配置和时间间隔,这里时间为间隔20秒
val ssc = new StreamingContext(sparkConf,Seconds(10));
// 指定监控的目录
// val lines = ssc.textFileStream("file:///home/ubuntu/sparkJar/test");
val lines = ssc.textFileStream("hdfs://10.149.252.106:9000/input");
val words = lines.flatMap(_.split(" "));
val wordCount = words.map(x => (x,1)).reduceByKey(_+_);
wordCount.saveAsTextFiles("hdfs://10.149.252.106:9000/output/")
wordCount.print();
ssc.start();
ssc.awaitTermination();
}
}
Spark来监控hdfs里的文件,并用wordcount计算
最新推荐文章于 2022-10-01 00:38:59 发布