SparkStream自定义接收器
需求:自定义接收器,通过Socket完成WordCount案列
1.自定义接收器类
package com.atguigu.bigdata.stream
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver
/**
* @author tianmin
* @date 2020/2/28 0028
* @notes 自定义接收器,读取Socket端口数据
*/
class MyReceiver(host:String,port:Int) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2){
var socket:Socket = _
def receiver(): Unit ={
// 初始化socket
socket = new Socket(host,port)
// 创建BufferedReader 读取一行
val reader = new BufferedReader(new InputStreamReader(socket.getInputStream,"utf-8"))
var line:String = null
while((line = reader.readLine()) != null){
if("END".equals(line)){
return
}else{
// 存储数据集
this.store(line)
}
}
}
override def onStart(): Unit = {
new Thread(new Runnable {
override def run(): Unit = {
receiver()
}
}).start()
}
override def onStop(): Unit = {
if(socket != null){
socket.close()
socket = null
}
}
}
2.调用
package com.atguigu.bigdata.stream
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* @author tianmin
* @date 2020/2/28 0028
* @notes
*/
object StreamWordCountDefine {
def main(args: Array[String]): Unit = {
// 配置文件
val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("StreamWordCountDefine")
// 初始化StreamingContext
val streamingContext = new StreamingContext(config,Seconds(5))
//通过receiverStream调用
val myReceiverDstream: ReceiverInputDStream[String] = streamingContext.receiverStream(new MyReceiver("hadoop101",44444))
val wordDstream: DStream[(String, Int)] = myReceiverDstream.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
//打印
wordDstream.print()
//启动
streamingContext.start()
streamingContext.awaitTermination()
}
}
3.输入数据
4.输出结果: