SparkSteaming整合kafka和redis,这里用的是默认偏移量
[root@dream1 ~]# kafka-console-producer.sh --broker-list dream1:9092,dream2:9092,dream3:9092 --topic test
>hello jeery spark tom hello
>hello spark
>hello
>hello tome^H
>heool^H^H
>tom jerry
>jerry
>spark
package com.ws.sparkstreaming
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import redis.clients.jedis.Jedis
object WordCountJoinKafkaRedis {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[*]")
val ssc = new StreamingContext(conf,Seconds(5))
val kafkaParams: Map[String, Object] = Map[String,Object](
"bootstrap.servers"->"dream1:9092,dream2:9092,dream3:9092",// kafka地址
"key.deserializer"->"org.apache.kafka.common.serialization.StringDeserializer", // 设置反序列化组件
"value.deserializer"->"org.apache.kafka.common.serialization.StringDeserializer",
"group.id"->"1", // 消费者组
"auto.offset.reset"->"earliest", // 指定消费者从哪开始消费[latest,earliest]
"enable.auto.commit"->"true" // 是否自动提交偏移量,默认是true
)
val topic: Iterable[String] = Array("test").toIterable
val rows: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topic, kafkaParams)
)
val lines = rows.map(r => r.value())
val reduce: DStream[(String, Int)] = lines.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
val jedis = new Jedis("dream1")
reduce.foreachRDD(rdd=>{
val tuples = rdd.collect()
for (elem <- tuples) {
jedis.hincrBy("wordcount",elem._1,elem._2)
}
})
ssc.start()
ssc.awaitTermination()
}
}