需求:SparkStream自定义接收器Receiver,用socket读入一行试下wordcount案列
1.自定义MyReceiver类
package com.atguigu.bigdata.stream
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import java.nio.charset.StandardCharsets
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver
/**
* @auth tianmin
* @date 2020-02-29 - 9:48
* @nodes 自定义接收器,接收socket套接字
*/
class MyReceiver(host:String,port:Int) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2 ){
var socket:Socket = _
def reciver(): Unit ={
socket = new Socket(host,port)
var line : String = null
// 创建带换成区的输入流,一下读取一行数据
val reader = new BufferedReader(new InputStreamReader(socket.getInputStream,StandardCharsets.UTF_8))
while ((line = reader.readLine()) != null){
if("END".equals(line)){
//关闭输入流
reader.close()
return
}else{
//存储
this.store(line)
}
}
}
//启动
override def onStart(): Unit = {
//单独启动一个线程监控
new Thread(new Runnable {
override def run(): Unit = {
reciver()
}
}).start()
}
//关闭
override def onStop(): Unit = {
if(socket != null){
socket.close()
socket = null
}
}
}
2.应用
package com.atguigu.bigdata.stream
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* @auth tianmin
* @date 2020-02-29 - 9:19
* @nodes
*/
object SocketWordCount {
def main(args: Array[String]): Unit = {
// 1 创建配置文件、StreamingContext对象
val config: SparkConf = new SparkConf().setMaster("local[*]").setAppName("StreamWordCount")
val streamingContext = new StreamingContext(config,Seconds(5))
// 2 创建自定义的receiver 的接收器
val socketDstream: ReceiverInputDStream[String] = streamingContext.receiverStream(new MyReceiver("hadoop101",44444))
val wordToDstream: DStream[(String, Int)] = socketDstream.flatMap(_.split(" "))
.map((_, 1))
.reduceByKey(_ + _)
// 3 打印
wordToDstream.print()
// 4 启动
streamingContext.start()
streamingContext.awaitTermination()
}
}
3.测试
linuex输入
[atguigu@hadoop101 ~]$ nc -lk 44444
a a a a
b b b b
4 结果
-------------------------------------------
Time: 1582941605000 ms
-------------------------------------------
(d,3)
(a,3)
(c,5)