import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._ // not necessary since Spark 1.3
import org.apache.spark.internal.Logging
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver
import java.lang.Thread.sleep
/**
* SparkStreaming 自定义数据源的编写
*/
object SparkStreaming_CustomDataSource {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(1))
//接收自定义数据源数据
val customReceiverStream = ssc.receiverStream(new CustomReceiver)
//进行输出
customReceiverStream.print()
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
}
class CustomReceiver
extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging {
override def onStart(): Unit = {
new Thread("Socket Receiver") {
override def run() {
receive()
}
}.start()
}
private def receive(): Unit = {
var li = List("b", "a", "c", "d", "e")
li.foreach(i => {
store(i)
sleep(2000)
})
}
override def onStop() {}
}
Spark自定义数据源官网地址:
https://spark.apache.org/docs/latest/streaming-custom-receivers.html