Spark学习(6)- Spark Streaming整合Flume

流程:

  1. 创建maven项目
  2. 引入相应的依赖
  3. 编写代码:pull(拉)方式 或者 push(推)方式​
  4. 发布

pull方式

 import java.net.InetSocketAddress
​
      import org.apache.spark.{SparkConf, SparkContext}
      import org.apache.spark.storage.StorageLevel
      import org.apache.spark.streaming.{Seconds, StreamingContext}
      import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
      import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
​
      //todo:利用sparkStreaming对接flume数据,实现单词计算------pull拉模式
      object SparkStreamingFlume_Poll {
        def main(args: Array[String]): Unit = {
           //1、创建sparkConf
            val sparkConf: SparkConf = new SparkConf().setAppName("SparkStreamingFlume_Poll").setMaster("local[2]")
          //2、创建sparkContext
            val sc = new SparkContext(sparkConf)
            sc.setLogLevel("WARN")
          //3、创建StreamingContext
            val ssc = new StreamingContext(sc,Seconds(5))
          //定义一个flume地址集合,可以同时接受多个flume的数据
          val address=Seq(new InetSocketAddress("192.168.200.100",9999),new InetSocketAddress("192.168.200.101",9999))
​
          //4、获取flume中数据
            val stream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createPollingStream(ssc,address,StorageLevel.MEMORY_AND_DISK_SER_2)
          //5、从Dstream中获取flume中的数据  {"header":xxxxx   "body":xxxxxx}
            val lineDstream: DStream[String] = stream.map(x => new String(x.event.getBody.array()))
          //6、切分每一行,每个单词计为1
            val wordAndOne: DStream[(String, Int)] = lineDstream.flatMap(_.split(" ")).map((_,1))
          //7、相同单词出现的次数累加
            val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
          //8、打印输出
          result.print()
​
          //开启计算
            ssc.start()
            ssc.awaitTermination()
​
        }
      }

push方式​

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
​
//todo:利用sparkStreaming对接flume数据,实现单词计数------Push推模式
object SparkStreamingFlume_Push {
​
  def main(args: Array[String]): Unit = {
    //1、创建sparkConf
      val sparkConf: SparkConf = new SparkConf().setAppName("SparkStreamingFlume_Push").setMaster("local[2]")
    //2、创建sparkContext
      val sc = new SparkContext(sparkConf)
      sc.setLogLevel("WARN")
    //3、创建StreamingContext
      val ssc = new StreamingContext(sc,Seconds(5))
    //4、获取flume中的数据
    val stream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createStream(ssc,"192.168.11.123",9999)
    //5、从Dstream中获取flume中的数据  {"header":xxxxx   "body":xxxxxx}
    val lineDstream: DStream[String] = stream.map(x => new String(x.event.getBody.array()))
    //6、切分每一行,每个单词计为1
    val wordAndOne: DStream[(String, Int)] = lineDstream.flatMap(_.split(" ")).map((_,1))
    //7、相同单词出现的次数累加
    val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
    //8、打印输出
    result.print()
​
    //开启计算
    ssc.start()
    ssc.awaitTermination()
  }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值