object RTMonitor{
// 屏蔽日志
Logger.getLogger("org.apache").setLevel(Level.WARN)
def main(args:Array[String]): Unit = {
val load = ConfigFactory.load()
// 创建kafka相关参数
val kafkaParars = Map(
"metadata.broker.list" -> load.getString("kafka.broker.list"),
"group.id" -> load.getString("kafka.group.id"),
"auto.offset.reset" -> "smallest"
)
val topics = load.getString("kafka.topics").split(",").toSet
val sparkConf = new SparkConf()
sparkConf.setMaster("local[*]")
sparkConf.setAppName("RTMonitor")
// 批次时间应该大于这个批次处理完的总花费(Total Delay)时间
val ssc = new StreamingContext(sparkConf, Seconds(2))
// 从kafka弄数据 -- 从数据库中获取到当前消费的偏移量 -- 从该位置接着往后消费
// 加配置信息
DBS.setup()
val fromOffsets = Map[TopicAndPartition, Long] = DB.readOnly{implicit session =>
sql"select * from streaming_offset_24 where groupid=?".bind(load.getString("kafka.group.id")).map(res => {
(TopicAndPartition(rs.string("topic"), rs.int("partitions")), rs.long("offset"))
}).list().apply()
}.toMap
// 假设程序第一次启动
val stream = if(fromOffsets.size == 0){
KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParars, topics)
} else {
var checkedOffset = Map[TopicAndPartition, Long]
val kefkaCluster = new KafkaCluster(kafkaParams)
val earliestLeaderOffsets = kefkaCluster.getEarliestLeaderOffsets(fromOffsets.keySet)
if(earliestLeaderOffsets.isRight){
val topicAndPartitionToOffset = earliestLeaderOffsets.right.get
// 开始对比
checkedOffset = fromOffsets.map(owner => {
val clusterEarliestOffset = topicAndPartitionToOffset.get(owner._1).get.offset
if(owner._2 >= clusterEarliestOffset){
owner
}else{
(owner._1, clusterEarliestOffset)
}
})
}
// 程序非第一次启动
val messageHandler = (mm:MessageAndMetadata[String, String]) => (mm.key(), mm.message())
KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder, (String, String)](ssc, kafkaParars, checkedOffset, messageHandler)
}
/**
如果使用窗口函数,没必要进行cache,默认就会cache,是通过WAl来做的;
如果采用的不是窗口函数操作的话,可以cache,数据会做一个
副本放到另外一台节点上做容错
receiver 接收数据是在Executor端
direct 接收数据是在Driver端
**/
// 处理数据 -- 根据业务需求
stream.foreachRDD(rdd => {
rdd.foreach(println)
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
// 实时报表
val baseDate = rdd.map(t => JSON.parseObject(_._2))
.filter(_.getString("serviceName").equalsIgnoreCase("reChargeNotifyRec")).map(jsObj => {
val result = jsObj.getString("bussinessRst")
val fee:Double = if(result.equals("0000"))jsObj.getDouble("chargefee") else 0
val isSucc:Double = if(result.equals("0000")) 1 else 0
val receiveTime = jsObj.getString("receiveNotifyTime")
val startTime = jsObj.getString("RequestId")
val pCode = jsObj.getString("provinceCode")// 省
// 消耗时间
val costime = if(result.equals("0000")) Utils.caculateRqt(startTime, receiveTime) else 0
// 只针对业务概况指标的基础数据
//("A-"+startTime.substring(0, 8), List[Double](1, isSucc, fee, costime.toDouble))
// 针对每小时的数据分布情况改进的基础数据
//("A-"+startTime.substring(0, 8), startTime.substring(0, 10), List[Double](1, isSucc, fee, costime.toDouble))
// 针对省的数据分布情况改进的基础数据
("A-"+startTime.substring(0, 8), startTime.substring(0, 10), List[Double](1, isSucc, fee, costime.toDouble), pCode)
// 按每分钟统计改进的基础数据
("A-"+startTime.substring(0, 8), startTime.substring(0, 10), List[Double](1, isSucc, fee, costime.toDouble), pCode, startTime.substring(0, 12))
})
// 实时报表 -- 业务概况指标计算
/**
统计全网充值订单量,充值金额,充值成功率及充值平均时长
**/
baseDate.map(t => (t._1, t._3)).reduceByKey((list1, list2) => {
(list1 zip list2) map(x => x._1 + x._2)
}).foreachPartition(itr => {
vak client = JedisUtils.getJedisClient()
itr.foreach(tp => {
client.hincrBy(tp._1, "total", tp._2(0).toLong)
client.hincrBy(tp._1, "succ", tp._2(1).toLong)
client.hincrByFloat(tp._1, "money", tp._2(2))
client.hincrBy(tp._1, "timer", tp._2(3).toLong)
client.expire(tp._1, 60 * 60 * 24 * 2)
})
client.close()
})
// 每个小时的数据分布情况统计
baseData.map(t => ("B-"+t._2, t._3)).reduceByKey((list1, list2) => {
(list1 zip list2) map(x => x._1 + x._2)
}).foreachParttition(itr => {
vak client = JedisUtils.getJedisClient()
itr.foreach(tp => {
// B-2017111816
client.hincrBy(tp._1, "total", tp._2(0).toLong)
client.hincrBy(tp._1, "succ", tp._2(1).toLong)
client.expire(tp._1, 60 * 60 * 24 * 2)
})
client.close()
})
// 每个省的充值成功数据分布情况统计
baseData.map(t => ((t._2, t._4), t._3)).reduceByKey((list1, list2) => {
(list1 zip list2) map(x => x._1 + x._2)
}).foreachParttition(itr => {
vak client = JedisUtils.getJedisClient()
itr.foreach(tp => {
client.hincrBy("P-"+tp._1._1.substring(0, 8), tp._1._2, tp._2(1).toLong)
client.expire("P-"+tp._1._1.substring(0, 8), 60 * 60 * 24 * 2)
})
client.close()
})
// 每分钟的数据分布情况统计
baseData.map(t => ("C-"+t._5, t._3)).reduceByKey((list1, list2) => {
(list1 zip list2) map(x => x._1 + x._2)
}).foreachParttition(itr => {
vak client = JedisUtils.getJedisClient()
itr.foreach(tp => {
client.hincrBy(tp._1, "succ", tp._2(1).toLong)
client.hincrByFloat(tp._1, "money", tp._2(2))
client.expire(tp._1, 60 * 60 * 24 * 2)
})
client.close()
})
// 记录偏移量(存入mysql)
offsetRanges.foreach(osr => {
//println("${osr.topic} ${osr.partition} ${osr.fromOffset} ${osr.untilOffset}")
DB.autoCommit{ implicit session =>
sql"REPLACE INTO streaming_offset_24(topic, groupid, partitions, offset) VALUES (?,?,?,?)".bind(osr.topic,load.getString("kafka.group.id"), osr.partition, osr.untilOffset).update().apply()
}
})
})
ssc.start()
ssc.awaitTermination()
}
}