流程:
- 创建maven项目
- 引入相应的依赖
- 编写代码:pull(拉)方式 或者 push(推)方式
- 发布
pull方式
import java.net.InetSocketAddress
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
//todo:利用sparkStreaming对接flume数据,实现单词计算------pull拉模式
object SparkStreamingFlume_Poll {
def main(args: Array[String]): Unit = {
//1、创建sparkConf
val sparkConf: SparkConf = new SparkConf().setAppName("SparkStreamingFlume_Poll").setMaster("local[2]")
//2、创建sparkContext
val sc = new SparkContext(sparkConf)
sc.setLogLevel("WARN")
//3、创建StreamingContext
val ssc = new StreamingContext(sc,Seconds(5))
//定义一个flume地址集合,可以同时接受多个flume的数据
val address=Seq(new InetSocketAddress("192.168.200.100",9999),new InetSocketAddress("192.168.200.101",9999))
//4、获取flume中数据
val stream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createPollingStream(ssc,address,StorageLevel.MEMORY_AND_DISK_SER_2)
//5、从Dstream中获取flume中的数据 {"header":xxxxx "body":xxxxxx}
val lineDstream: DStream[String] = stream.map(x => new String(x.event.getBody.array()))
//6、切分每一行,每个单词计为1
val wordAndOne: DStream[(String, Int)] = lineDstream.flatMap(_.split(" ")).map((_,1))
//7、相同单词出现的次数累加
val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
//8、打印输出
result.print()
//开启计算
ssc.start()
ssc.awaitTermination()
}
}
push方式
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
//todo:利用sparkStreaming对接flume数据,实现单词计数------Push推模式
object SparkStreamingFlume_Push {
def main(args: Array[String]): Unit = {
//1、创建sparkConf
val sparkConf: SparkConf = new SparkConf().setAppName("SparkStreamingFlume_Push").setMaster("local[2]")
//2、创建sparkContext
val sc = new SparkContext(sparkConf)
sc.setLogLevel("WARN")
//3、创建StreamingContext
val ssc = new StreamingContext(sc,Seconds(5))
//4、获取flume中的数据
val stream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createStream(ssc,"192.168.11.123",9999)
//5、从Dstream中获取flume中的数据 {"header":xxxxx "body":xxxxxx}
val lineDstream: DStream[String] = stream.map(x => new String(x.event.getBody.array()))
//6、切分每一行,每个单词计为1
val wordAndOne: DStream[(String, Int)] = lineDstream.flatMap(_.split(" ")).map((_,1))
//7、相同单词出现的次数累加
val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
//8、打印输出
result.print()
//开启计算
ssc.start()
ssc.awaitTermination()
}
}