package Kafka010
import Kafka010.Utils.MyKafkaUtils
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* Created by Shi shuai RollerQing on 2019/12/24 19:47
*
* kakfa的API 0-10版本的Consumer测试
*/
//TODO : kakfa的API 0-10版本的Consumer测试
object Kafka010Demo01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName(s"${this.getClass.getCanonicalName}")
val ssc = new StreamingContext(conf, Seconds(5))
val topic = List("topicA") //后面的ConsumerStrategies的参数要求topic为集合的形式 可能不止一个topic
val kafkaParams = MyKafkaUtils.getKafkaConsumerParams("SparkKafka010")
val ds: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topic, kafkaParams)
)
ds.foreachRDD(rdd => {
if (!rdd.isEmpty()) {
println(rdd.count())
}
})
ssc.start()
ssc.awaitTermination()
}
}
//这个KafkaUtils.createDirectStream要规定kafka的k v的类型 然后三个参数 一个ssc 另外两个位置策略和消费者策略点进去看看
// PreferConsistent: Use this in most cases, it will consistently distribute partitions across all executors.
// PreferBrokers: Use this only if your executors are on the same nodes as your Kafka brokers.
// PreferFixed: Use this to place particular TopicPartitions on particular hosts if your load is uneven.
// Any TopicPartition not specified in the map will use a consistent location.
// Assign: 消费部分分区
工具类
package Kafka010.Utils
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
/**
* Created by Shi shuai RollerQing on 2019/12/24 19:20
*/
object MyKafkaUtils {
def getKafkaConsumerParams(grouid: String = "SparkStreaming010", autoCommit: String = "true"): Map[String, String] = {
val kafkaParams = Map[String, String] (
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> autoCommit,
//ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",//earliest、 none 、latest 具体含义可以点进去看
ConsumerConfig.GROUP_ID_CONFIG -> grouid,
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer].getName,
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer].getName
)
kafkaParams
}
/**
* 这个是官网的写kafka配置的写法,不过还是推荐使用第一种,这样不用自己写参数,避免手误
*
* 这个没有经过测试 要是使用也要改下 传参数进来 比如跟上面一样的groupid 要不就使用默认的
* @return
*/
def getKafkaConsumerParams2(): Map[String, Object] = {
val kafkaParams = Map[String, Object] {
"bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092"
"key.deserializer" -> classOf[StringDeserializer]
"value.deserializer" -> classOf[StringDeserializer]
"auto.offset.reset" -> "latest"
"group.id" -> "topicA"
"enable.auto.commit" -> (true: java.lang.Boolean)
}
kafkaParams
}
def main(args: Array[String]): Unit = {
println(classOf[StringDeserializer].getName) //org.apache.kafka.common.serialization.StringDeserializer
println(classOf[StringDeserializer].getClass) //class java.lang.Class
println(classOf[StringDeserializer]) //class org.apache.kafka.common.serialization.StringDeserializer
}
}
结果没错 求的就是每5s的批次的数据条数