package org.example
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
object Spark_Hdfs {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
var conf = new SparkConf().setMaster("local[*]").setAppName("SparkStream_HDFS")
// var ssc = new StreamingContext(conf,Seconds(6))
var sc = new SparkContext(conf)
var line = sc.textFile("hdfs://bd01:9000/spark/stream")
var DStream = line.flatMap(x => x.split(" "))
var Dmap = DStream.map(x => (x, 1))
var DKey = Dmap.reduceByKey((x, y) => (x + y))
DKey.foreach(println)
}
}
Spark打印出文件流的信息,这里有几个要注意的点,一是写hdfs文件路径时要注意fs.defaultFS(在core-xml的配置)是要能在外面访问通的