package utils
import org.apache.commons.pool2.impl.GenericObjectPoolConfig
import redis.clients.jedis.{Jedis, JedisPool}
/**
* 创建jedis连接池
*/
object Jpools {
private val poolConfig = new GenericObjectPoolConfig()
poolConfig.setMaxIdle(5)//最大的空闲连接数
poolConfig.setMaxTotal(2000)//支持最大的连接数
//连接池不需要对外提供访问
private lazy val jedisPool = new JedisPool(poolConfig,"hadoop01")
/**
* 对外提供一个可以从池子里面获取连接的方法
* @return
*/
def getJedis :Jedis={
val jedis = jedisPool.getResource
jedis.select(2)
jedis
}
}
package kafka2
import java.lang
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import utils.Jpools
object DataConsumerToRedis {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("redis").setMaster("local[*]")
conf.set("spark.streaming.kafka.maxRatePerPartition","2")
conf.set("spark.streaming.stopGracefullyOnShutdown","true")
val ssc = new StreamingContext(conf,Seconds(2))
//定义一个消费组
val groupid = "day_002"
//配置参数
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> groupid,
"auto.offset.reset" -> "earliest",
//"auto.commit.interval.ms"-> "1000",设置为1秒提交一次offset,默认是5秒
"enable.auto.commit" -> (false: lang.Boolean) //是否自动递交偏移量
)
//创建kafka
val stream = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Array("wordcount"), kafkaParams)
)
//将DStream -->RDD
stream.foreachRDD(rdd=>{
rdd.map(crd=>(crd.value(),1)).reduceByKey(_+_).foreachPartition(item=>{
//获取一个jedis连接
val jedis = Jpools.getJedis
item.foreach(tp=>{
jedis.hincrBy("Word",tp._1,tp._2)
})
jedis.close()
})
})
ssc.start()
ssc.awaitTermination()
}
}