kafka0.8和kafka1.0的区别
1.kafka1.0版本不支持receiver连接方式
2. kafka1.0版本自动更新保存偏移量到kafka中
注意
如果使用kafka0.10必须在pom文件中添加一下配置
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
<version>1.1.6</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.10.2.0</version>
</dependency>
代码
package xxx
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.{Seconds, StreamingContext}
object DirectStream {
def main (args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
.setMaster("local[*]")
val ssc = new StreamingContext(conf,Seconds(5))
val group = "sd"
val topic = "study09a1"
//配置kafka的参数
val kafkaParams = Map[String,Object](
"bootstrap.servers" -> "had01:9092,had02:9092,had03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> group,
"auto.offset.reset" -> "earliest", // lastest
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array(topic)
//使用直连的方式读取kafka中的数据,在kafka中记录偏移量
val stream = KafkaUtils.createDirectStream[String,String](
ssc,
//位置策略(如果kafka和spark程序部署在一起,会有最有位置)
PreferConsistent,
//订阅策略(可以指定用正则方式读取topic,比如:my-ordsers-.*)
Subscribe[String,String](topics,kafkaParams)
)
stream.foreachRDD{rdd=> {
//获取偏移量
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
//处理数据
rdd.foreach(line => {
println(line.key() + "*************" + line.value())
})
//更新偏移量
stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
}
}
ssc.start()
ssc.awaitTermination()
}
}