object Output_01 {
def main(args: Array[String]): Unit = {
System.setProperty("hadoop.home.dir", "E:\\software\\bigdate\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0")
val conf= new SparkConf();
conf.setMaster("local[2]")//一个线程用于读数据,一个用于处理数据
conf.setAppName("WordCountStreaming")//在yarn资源管理器监控
val sc= new SparkContext(conf);
val ssc=new SQLContext(sc)
val streamingContext= new StreamingContext(sc,Seconds(5))
val sourceDS= streamingContext.socketTextStream("wangfutai",7777)
/* sourceDS.flatMap(_.split(" ")).map((_,1))
.reduceByKey(_+_).saveAsTextFiles("E:\\sparkdata\\")*/
//数据为 :id,name,age 输出到json格式保存
sourceDS.map(
line=>{
Row( line.split(",")(0).toInt,
line.split(",")(1),
line.split(",")(2).toInt)
}
).foreachRDD(
batchRDD=>{
val sf1=StructField("id",IntegerType,true)
val sf2= StructField("name",StringType,true)
val sf3= StructField("age",IntegerType,true)
val st=StructType(List(sf1,sf2,sf3))
val df= ssc.createDataFrame(batchRDD,st)
df.registerTempTable("per")
val res=ssc.sql("select * from per limit 2")
//res.save("E:\\sparkdata")
res.write.format("json")
.mode(SaveMode.Append).save("E:\\\\sparkdata")
}
)
streamingContext.start()
streamingContext.awaitTermination()
}
}