概述:
spark-streaming-kafka-0-8_2.11 有两种方式连接Kafka,一种是Receiver,另一种是Direct。
Receiver实现:
def main(args: Array[String]): Unit = {
//创建配置对象文件
val conf: SparkConf = new SparkConf().setAppName("SparkStreaming_Demo04").setMaster("local[*]")
//创建SparkStreaming程序执行入口对象
val ssc: StreamingContext = new StreamingContext(conf,Seconds(3))
//连接Kafka,创建DStream
val kafkaDS: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(
ssc,
"node1:2181,node2:2181,node3:2181",
"group1",
Map("bigdata-0105" -> 2)
)
//获取Kafka中的消息,我们只需要v的部分
val lineDS: DStream[String] = kafkaDS.map(_._2)
lineDS.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}
生产信息kafka-console-producer.sh --topic bigdata-0105 --broker-list node1:9092
运行结果
Direct实现:
def main(args: Array[String]): Unit = {
//创建配置对象文件
val conf: SparkConf = new SparkConf().setAppName("SparkStreaming_Demo05").setMaster("local[*]")
//创建SparkStreaming程序执行入口对象
val ssc: StreamingContext = new StreamingContext(conf,Seconds(3))
//设置检查点目录
ssc.checkpoint("checkpoint")
//准备Kafka参数
val kafkaParams: Map[String, String] = Map[String, String](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "node1:9092,node2:9092,node3:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "group1"
)
//连接Kafka,创建DStream
val kafkaDS: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
ssc,
kafkaParams,
Set("bigdata-0105")
)
//获取Kafka中的消息,我们只需要v的部分
val lineDS: DStream[String] = kafkaDS.map(_._2)
lineDS.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}
生产信息kafka-console-producer.sh --topic bigdata-0105 --broker-list node1:9092
运行结果