记录一下自己写的sparkstreaming保存offset到redis,并从上次的offset消费
//初始化streamingcontext对象
val ssc: StreamingContext = new StreamingContext(sc, Seconds(2))
//初始化jedis
val jedis: JedisCluster = JedisConnectionUtil.getJedisCluster
var kafkadata: InputDStream[(String, String)] = null
//修改key
val jsonOffset = jedis.get("huhuhu")
//判断是否保存有offset
if(jsonOffset == null){
kafkadata = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, sourceTopic)
}else{
//封装offset
val array = JSON.parseArray(jsonOffset).toArray
var topicmap = Map[TopicAndPartition, Long]()
for (elem <- array) {
val array = elem.asInstanceOf[JSONArray].toArray()
topicmap += (TopicAndPartition(array(0).toString,array(1).toString.toInt) -> array(3).toString.toLong)
}
val messageHandler = (mm:MessageAndMetadata[String,String]) => (mm.key(),mm.message())
//创建kafkardd
//结束时间 17:25
kafkadata = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder,(String,String)](ssc, kafkaParams, topicmap,messageHandler)
}
//linesData = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, sourceTopic)
//读取kafka数据
var offsetRanges = Array.empty[OffsetRange]
kafkadata.transform(rdd => {
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}).foreachRDD(rdd => {
var arr = ArrayBuffer[Any]()
for (o <- offsetRanges) {
arr += Array(o.topic, o.partition,o.fromOffset, o.untilOffset)
}
var json = JSON.toJSONString(arr.toArray, true)
//修改redis的key
jedis.set("huhuhu",json)
})