package com.qf.sparkstreaming
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* SparkStreaming与Kafka的整合,用于消费Kafka里的信息
* 使用的整合包是0-10. 使用里面的Direct直连方式。 而0-8里除了direct还有一个reciver方法
*/
object _05SparkStreamingKafkaDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("test1").setMaster("local[2]")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
//获取上下文对象
val ssc = new StreamingContext(conf, Seconds(10))
//设置消费者的属性信息
val params: Map[String, String] = Map[String, String](
"bootstrap.servers" -> "qianfeng01:9092,qianfeng02:9092,qianfeng03:9092",
"group.id" -> "test1",
"auto.offset.reset" -> "latest",
"key.deserializer" -> "org.apache.kafka.common.serialization.IntegerDeserializer",
"value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer"
)
//使用整合包里的工具类,调用直连方法
val dStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
LocationStrategies.PreferConsistent, //指定消费策略:PreferConsistent, SparkStreaming会为kafka主题中的每一个分区分配一个计算
ConsumerStrategies.Subscribe[String,String](Array("pet").toSet, params)
)
//打印数据
dStream.map(_.value()).print()
//启动
ssc.start()
ssc.awaitTermination()
}
}
在hdfs上开启生产者然后发数据:
kafka-console-producer.sh --broker-list
qianfeng01:9092,qianfeng02:9092,qianfeng03:9092
–topic pet