需求
定义数据源,实现监控某个端口号,获取该端口号内容。
代码实现
- 继承Receiver,实现onStart()和onStop()方法。
object SparkStreaming02_CustomerReceiver {
def main(args: Array[String]): Unit = {
//创建配置对象
val conf: SparkConf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[*]")
//创建StreamingContext程序入口
val ssc = new StreamingContext(conf,Seconds(3))
//接收数据
val receiveDStream: ReceiverInputDStream[String] = ssc.receiverStream(MyReceiver("hadoop01",8888))
//统计数据
val resDStream: DStream[(String, Int)] = receiveDStream.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
resDStream.print()
//开启采集数据
ssc.start()
//等待采集完毕
ssc.awaitTermination()
}
}
// 自定义数据源 参考socketTextStream
// 泛型表示的是读取数据的类型
case class MyReceiver(host: String, port: Int) extends Receiver[String](StorageLevel.MEMORY_ONLY) {
private var socket: Socket = _
//最初启动时调用的方法,读取数据并发送给spark
override def onStart(): Unit = {
new Thread("Socket Receiver") {
setDaemon(true)
override def run() {
receive()
}
}.start()
}
//接收处理数据的逻辑
def receive(): Unit = {
try {
//创建连接
socket = new Socket(host, port)
//获取输入流
val is: InputStream = socket.getInputStream()
//转换流,每次读取一行
val br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))
//定义变量接收数据
var line: String = null
while ((line = br.readLine()) != null) {
//发送数据
store(line)
}
} catch {
case e: ConnectException =>
restart(s"Error connecting to $host:$port", e)
return
} finally {
onStop()
}
}
override def onStop(): Unit = {
synchronized {
if (socket != null) {
socket.close()
socket = null
println(s"Closed socket to $host:$port")
}
}
}
}