1、maven的pom.xml文件,添加jedis依赖
<!-- redis-->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
2、创建一个jedis连接池,条用getConnection 方法,获取连接
package day01.jedised3
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
object JedisConnectionPools {
val conf = new JedisPoolConfig()
//最大连接数
conf.setMaxTotal(20)
//最大空闲连接数
conf.setMaxIdle(10)
//调用borrow Object方法时,是否进行有效检查
conf.setTestOnBorrow(true)
//ip地址, redis的端口号,连接超时时间
val pool = new JedisPool(conf,"192.168.136.128",6379,10000)
def getConnection():Jedis={
pool.getResource
}
/*一个测试
def main(args: Array[String]): Unit = {
val conn = JedisConnectionPool.getConnection()
val r1: String = conn.get("mm")
conn.hset("","","")
println(r1)
conn.incrBy("mm",-50)
val r2 = conn.get("mm")
println(r2)
conn.close()
}
*/
}
import day01.jedised3.JedisConnectionPools
import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.StringDecoder
import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
import org.I0Itec.zkclient.ZkClient
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Duration, StreamingContext}
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaUtils, OffsetRange}
import redis.clients.jedis.Jedis
object KafkaWordContJedis {
def main(args: Array[String]): Unit = {
val group = "groupPP" //组名
val conf = new SparkConf().setAppName("OrderCount").setMaster("local[2]")
val ssc = new StreamingContext(conf,Duration(5000))
//new 一个StreamingContext,每5秒是一个批次
val topic = "wc2" //topic 名
//brokerList 地址
val brokerList = "hadoop01:9092,hadoop02:9092,hadoop03:9092"
//zookeeper地址
val zkQuorum = "hadoop01:2181,hadoop02:2181,hadoop03:2181"
//指定要消费的topic 名称
val topics : Set[String] = Set(topic)
//new一个ZKGroupTopicDirs,通过组名/offset/topic名/来记录偏移量
val topicDir: ZKGroupTopicDirs = new ZKGroupTopicDirs(group,topic)
//获取记录偏移量的路径 /组名/offset/topic名称
//此处还可以写成val zkTopicPath :String = topicDir.consumerOffsetDir
val zkTopicPath: String = s"${topicDir.consumerOffsetDir}"
//设置Kafka连接时的参数
val kafkaParams:Map[String,String] = Map[String,String](
"metadata.broker.list"->brokerList,
"group.id"->group,
"auto.offset.reset"->kafka.api.OffsetRequest.SmallestTimeString
//表示默认从头开始读
)
//创建ZkClient,用来更新偏移量
val zkClient: ZkClient = new ZkClient(zkQuorum)
//获取以前有没有读取过,读取过返回分区个数,没有读取过返回0
val children = zkClient.countChildren(zkTopicPath)
//创建InputDStream
var kafkaStream :InputDStream[(String,String)] = null
//用来记录每个分区的偏移量
var fromOffsets:Map[TopicAndPartition,Long] = Map()
if(children>0){
//分区号从0 开始
for (i <- 0 until children){
//获取每个分区的偏移量(结果时String)
//(zkTopicPath+"/"+i) /组名/offset/topic名/分区号
val partitionOffset: String = zkClient.readData[String](zkTopicPath+"/"+i)
val tp = TopicAndPartition(topic,i)
fromOffsets += (tp->partitionOffset.toLong)
}
//创建一个函数,返回的的时kafka的key(默认不设置,是null),和value
val messageHeader=(mss:MessageAndMetadata[String,String])=>{
(mss.key(),mss.message())
}
kafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder, (String, String)](ssc,kafkaParams,fromOffsets,messageHeader)
}else{//从头开始读
kafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc,kafkaParams,topics)
}
//用来记录偏移量的范围为
var offsetRanges = Array[OffsetRange]()
kafkaStream.foreachRDD(kafkardd=>{//循环一批RDD
if(!kafkardd.isEmpty()){
offsetRanges = kafkardd.asInstanceOf[HasOffsetRanges].offsetRanges// 记录偏移范围
//获取value值,(key值没有用)
val lines:RDD[String] = kafkardd.map(_._2)
//wordCount
val flatMaped: RDD[String] = lines.flatMap(_.split(" "))
val maped = flatMaped.map((_,1))
val reduced: RDD[(String, Int)] = maped.reduceByKey(_+_)
println(reduced.collect().toBuffer)
reduced.foreachPartition(part =>{
//获取jedis连接
val conn: Jedis = JedisConnectionPools.getConnection()
for(one <- part){
println(one)
//每次在原来的相同的key的个数,加上这个批次的这个key的数量
conn.hincrBy("wordCount",one._1,one._2)
}
conn.close()
for (o <- offsetRanges){
val zkPath = topicDir.consumerOffsetDir+"/"+ o.partition
ZkUtils.updatePersistentPath(zkClient,zkPath,o.untilOffset.toString)
}
}
})
})
ssc.start()
ssc.awaitTermination()
}
}