<dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.26</version>
</dependency>
<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc-core_2.11</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>org.scalikejdbc</groupId>
<artifactId>scalikejdbc-config_2.11</artifactId>
<version>2.5.0</version>
</dependency>
package com.sparkStreaming.Demo12_OffsetMysql
import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.KafkaCluster.Err
import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaCluster, KafkaUtils}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import scalikejdbc.{DB, SQL}
import scalikejdbc.config.DBs
object SparkStreamingOffsetMysql {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("ssom").setMaster("local[2]")
val ssc = new StreamingContext(conf,Seconds(3))
val groupid = "gp0123"
val brokerList = "192.168.14.128:9092,192.168.14.129:9092,192.168.14.130:9092"
val topic = "tt"
val topics = Set(topic)
val kafkas = Map(
"metadata.broker.list"->brokerList,
"group.id"-> groupid,
"auto.offset.reset"->kafka.api.OffsetRequest.SmallestTimeString
)
DBs.setup()
val fromdbOffset :Map[TopicAndPartition,Long] =
DB.readOnly{
implicit session =>
SQL(s"select * from offsets where groupId ='${groupid}'")
.map(m=>(TopicAndPartition(
m.string("topic"),m.int("partitions")),m.long("untilOffset")))
.toList().apply()
}.toMap
var kafkaStream :InputDStream[(String,String)] = null
if(fromdbOffset.size ==0){
kafkaStream = KafkaUtils.
createDirectStream[String,String,StringDecoder,StringDecoder](
ssc,kafkas,topics)
}else{
var checckOffset = Map[TopicAndPartition,Long]()
val kafkaCluster = new KafkaCluster(kafkas)
val earliesOffsets: Either[Err,
Map[TopicAndPartition, KafkaCluster.LeaderOffset]] =
kafkaCluster.getEarliestLeaderOffsets(fromdbOffset.keySet)
if(earliesOffsets.isRight){
val topicAndPartitionOffset:
Map[TopicAndPartition, KafkaCluster.LeaderOffset] =
earliesOffsets.right.get
checckOffset = fromdbOffset.map(owner=>{
val topicOffset = topicAndPartitionOffset.get(owner._1).get.offset
if(owner._2 > topicOffset){
owner
}else{
(owner._1,topicOffset)
}
})
}
val messageHandler = (mmd:MessageAndMetadata[String,String])=>{
(mmd.key(),mmd.message())
}
kafkaStream = KafkaUtils.
createDirectStream[String,String,
StringDecoder,StringDecoder,
(String,String)](ssc,kafkas,checckOffset,messageHandler)
}
kafkaStream.foreachRDD(kafkaRDD=>{
val offsetRanges = kafkaRDD.asInstanceOf[HasOffsetRanges].offsetRanges
val lines = kafkaRDD.map(_._2)
lines.foreach(println)
DB.localTx{
implicit session =>
for(os<-offsetRanges){
SQL("replace into " +
"offsets(groupId,topic,partitions,untilOffset) values(?,?,?,?)")
.bind(groupid,os.topic,os.partition,os.untilOffset)
.update().apply()
}
}
})
ssc.start()
ssc.awaitTermination()
}
}