package SparkStreaming.SparkStreaming_IMOOC.Spark
import SparkStreaming.SparkStreaming_IMOOC.Dao.OffsetReadAndSave
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{HasOffsetRanges, OffsetRange}
import org.apache.spark.streaming.{Minutes, StreamingContext}
object SparkStreaming_Main {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("spark").setMaster("local[*]")
val ssc: StreamingContext = new StreamingContext(conf,Minutes(1))
val topics: Array[String] = Array("first")
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hadoop:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
var offsetRanges = Array[OffsetRange]()
val inDStream: InputDStream[ConsumerRecord[String, String]] = OffsetReadAndSave.KafkaOffsetRead(ssc,kafkaParams,topics)
inDStream.foreachRDD{ rdd =>
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
OffsetReadAndSave.KafkaOffsetSave(offsetRanges)
}
ssc.start()
ssc.awaitTermination()
}
}
package SparkStreaming.SparkStreaming_IMOOC.Dao
import java.sql.PreparedStatement
import SparkStreaming.SparkStreaming_IMOOC.Utils.MysqlUtils
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies, OffsetRange}
object OffsetReadAndSave {
def KafkaOffsetRead(ssc: StreamingContext, kafkaParams: Map[String, Object], topics: Array[String]): InputDStream[ConsumerRecord[String, String]] = {
val connOffset = MysqlUtils.getConnection
val psOffsetCnt: PreparedStatement = connOffset.prepareStatement("SELECT SUM(1) FROM `kafka_offset` WHERE `topic`=?")
psOffsetCnt.setString(1, topics(0))
val rs = psOffsetCnt.executeQuery()
var parCount = 0
while (rs.next()) {
parCount = rs.getInt(1)
println(parCount.toString)
}
var kafkaStream : InputDStream[ConsumerRecord[String, String]] = null
var untilOffset: Map[TopicPartition, Long] = Map()
val psOffsetRead: PreparedStatement = connOffset.prepareStatement("SELECT `untilOffset` FROM `kafka_offset` WHERE `topic`=? AND `partition`=?")
if (parCount > 0) {
for (i <- 0 until parCount) {
psOffsetRead.setString(1, topics(0))
psOffsetRead.setInt(2, i)
val rs1 = psOffsetRead.executeQuery()
while (rs1.next()) {
val partitionOffset = rs1.getInt(1)
val tp = new TopicPartition(topics(0), i)
untilOffset += (tp -> partitionOffset.toLong)
}
}
kafkaStream = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams,untilOffset)
)
}
else {
kafkaStream = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
)
}
MysqlUtils.closeCon(psOffsetCnt,connOffset)
MysqlUtils.closeCon(psOffsetRead,connOffset)
kafkaStream
}
def KafkaOffsetSave(offsetRanges: Array[OffsetRange]): Unit = {
val connOffset = MysqlUtils.getConnection
connOffset.setAutoCommit(false)
val psOffset: PreparedStatement = connOffset.prepareStatement("REPLACE INTO `kafka_offset` (`topic`, `partition`, `fromOffsets`,`untilOffset`) VALUES (?,?,?,?)")
for (o <- offsetRanges) {
println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}")
psOffset.setString(1, o.topic.toString)
psOffset.setInt(2, o.partition.toInt)
psOffset.setLong(3, o.fromOffset.toLong)
psOffset.setLong(4, o.untilOffset.toLong)
psOffset.addBatch()
}
psOffset.executeBatch()
connOffset.commit()
MysqlUtils.closeCon(psOffset,connOffset)
}
}
package SparkStreaming.SparkStreaming_IMOOC.Utils
import java.sql.{Connection, DriverManager, PreparedStatement}
object MysqlUtils {
def getConnection(): Connection ={
val connection: Connection = DriverManager.getConnection("jdbc:mysql://hadoop:3306/test","hive","hive")
connection
}
def closeCon(psOffsetCnt: PreparedStatement, connOffset: Connection): Unit ={
if(psOffsetCnt != null){
try{
psOffsetCnt.close()
}catch {
case e:Exception => e.printStackTrace()
}
}
if(connOffset != null){
try{
connOffset.close()
}catch{
case e:Exception => e.printStackTrace()
}
}
}
}