spark读取oracle写入kafka,spark读取kafka写入redis

object PassengerFlowConsumerRedis {private val logger: Logger = Logger.getLogger(this.getClass)

def main(args: Array[String]): Unit={

val properties= PropertiesScalaUtils.loadProperties("kafka.properties")

val kafkaParams=Map[String, Object](

ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG-> properties.getProperty("kafka.bootstrap.servers"),

ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer],

ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer],

ConsumerConfig.GROUP_ID_CONFIG-> "group-66",

ConsumerConfig.AUTO_OFFSET_RESET_CONFIG-> properties.getProperty("kafka.auto.offset.reset"),

ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG-> (false: java.lang.Boolean))

val conf= new SparkConf().setIfMissing("spark.master", "local[4]").setAppName("UserCountStat")

val streamingContext= new StreamingContext(conf, Seconds(5))

val redisHost= properties.getProperty("redis.host")

val redisPort= properties.getProperty("redis.port")

val redisTimeout= properties.getProperty("redis.timeout")

val maxTotal= properties.getProperty("redis.maxTotal")

val maxIdle= properties.getProperty("redis.maxIdle")

val minIdle= properties.getProperty("redis.minIdle")

JedisPoolUtils.makePool(redisHost, redisPort.toInt, redisTimeout.toInt, maxTotal.toInt, maxIdle.toInt, minIdle.toInt)

val jedis: Jedis=JedisPoolUtils.getPool.getResource

val topicsPartition= Map("topic_1"-> 3,"topic_2"-> 4,"topic_3"-> 5)

val topics=topicsPartition.keys.toList

val fromOffsets: Map[TopicPartition, Long]=readOffsets(jedis, topicsPartition)

val kafkaStream: InputDStream[ConsumerRecord[String, String]]=KafkaUtils.createDirectStream(

streamingContext,

LocationStrategies.PreferConsistent,

ConsumerStrategies.Subscribe[String, String](topics, kafkaParams, fromOffsets)

)//开始处理批次消息

kafkaStream.foreachRDD(rdd =>{if(!rdd.isEmpty()){//获取当前批次的RDD的偏移量

val offsetRanges =rdd.asInstanceOf[HasOffsetRanges].offsetRanges

rdd.foreachPartition(partition=>{

val offset: OffsetRange=offsetRanges(TaskContext.get.partitionId)

logger.info(s"${offset.topic} ${offset.partition} ${offset.fromOffset} ${offset.untilOffset}")

val jedisClient=JedisPoolUtils.getPool.getResource//jedisClient.select(1)

val pipline: Pipeline=jedisClient.pipelined();//开启事务

pipline.multi()

partition.foreach(record=>{//数据计算逻辑

println(record)

println(s"===============${record.topic()}_${record.partition()}====================")

})//更新Offset

offsetRanges.foreach { offsetRange =>logger.info(s"topic: ${offsetRange.topic} === partition: ${offsetRange.partition} === fromOffset: ${offsetRange.fromOffset} === untilOffset: ${offsetRange.untilOffset}")

val topic_partition_key= offsetRange.topic + "_" +offsetRange.partition

pipline.set(topic_partition_key, offsetRange.untilOffset.toString)

}//提交事务

pipline.exec();//关闭pipeline

pipline.sync();//关闭连接

jedisClient.close()

})

}

})

streamingContext.start()

streamingContext.awaitTermination()

streamingContext.stop()case class MyRecord(topic: String, id: String, timestramp: String, uuid: String) extendsSerializable

def processLogs(rdd: RDD[ConsumerRecord[String, String]]): Array[MyRecord]={

rdd.map(_.value()).flatMap(parseLog).filter(_!= null).distinct().collect()

}//解析每条数据,生成MyRecord

def parseLog(line: String): Option[MyRecord] ={//数据格式: topic_1|43|2019-07-23 14:55:23.467|039702c7-0f8d-4ca1-9a01-e0fdd68c63de

val ary: Array[String] = line.split("\\|", -1);try{

val topic= ary(0).trim

val id= ary(1).trim

val timestramp= ary(2).trim

val uuid= ary(3).trimreturnSome(MyRecord(topic, id, timestramp, uuid))

}catch{case e: Exception =>logger.error("解析错误", e)

println(e.getMessage)

}returnNone

}

}

def readOffsets(jedis: Jedis, topicsPartition: Map[String,Int]): Map[TopicPartition, Long]={//设置每个分区起始的Offset

var fromOffsets: Map[TopicPartition, Long] =Map()try{//jedis.select(1)

topicsPartition.foreach(topic =>{

var topicFromOffsets: Map[TopicPartition, Long]=Map()for (i

val topic_partition_key= topic._1 + "_" +iif (!jedis.exists(topic_partition_key)) {

jedis.set(topic_partition_key,"0")

}

val lastSavedOffset=jedis.get(topic_partition_key)

logger.info(s"topic ${topic} partition ${i} get lastSavedOffset from redis: ${lastSavedOffset}")

topicFromOffsets+= (new TopicPartition(topic._1, i) ->lastSavedOffset.toLong)

}

fromOffsets++=topicFromOffsets

})

}catch{case e: Exception =>logger.error("readOffsets error ",e)

System.exit(1)

}

fromOffsets

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值