通过继承Receiver,并实现onStart、onStop方法来自定义数据源采集。
需要自己开一个sockect,,然后输入内容。
nc -lk master 8888
package com.jiangnan.spark
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.receiver.Receiver
class TestSparkStreamCustomReceiver(host:String,port:Int) extends Receiver[String](StorageLevel.DISK_ONLY){
//启动的时候调用
override def onStart(): Unit = {
println("启动了")
//创建一个socket
val socket = new Socket(host,port)
val reader = new BufferedReader(new InputStreamReader(socket.getInputStream))
//创建一个变量去读取socket的输入流的数据
var line = reader.readLine()
while(!isStopped() && line != null){
//如果接收到了数据,就是用父类中的st