spark streaming 从kafka读取数据,将流处理结果写入mysql
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import java.sql.{PreparedStatement,Connection,DriverManager}
import java.util.concurrent.atomic.AtomicInteger
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, Minutes}
import org.apache.log4j.Logger
import org.apache.log4j.Level
object KafkaWordCount{
def main(args:Array[String]){
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "localhost:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean))
val sc = new SparkConf().setAppName("ZkWordCount")
val ssc = new StreamingContext(sc, Seconds(10))
ssc.checkpoint("hdfs://nameservice1/test/checkpoint")
val topics = Array("zklog")
val stream = KafkaUtils.createDirectStream[String, String](
ssc,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val lines = stream.map(record => (record.value))
val words = lines.flatMap(_.split(","))
val pair = words.map(x => (x,1))
val wordCounts = pair.reduceByKeyAndWindow(_ + _,_ - _,Minutes(1),Seconds(10),3)
wordCounts.print()
wordCounts.foreachRDD(rdd => {
def func(records: Iterator[(String,Int)]) {
var conn: Connection = null
var stmt: PreparedStatement = null
try {
val url = "jdbc:mysql://172.xx.xx.xx:3306/bigdata"
val user = "admin"
val password = "admin"
conn = DriverManager.getConnection(url, user, password)
records.foreach(p => {
val sql = "insert into zklog(information,count) values (?,?)"
stmt = conn.prepareStatement(sql);
stmt.setString(1, p._1.trim)
stmt.setInt(2,p._2.toInt)
stmt.executeUpdate()
})
} catch {
case e: Exception => e.printStackTrace()
} finally {
if (stmt != null) {
stmt.close()
}
if (conn != null) {
conn.close()
}
}
}
val repartitionedRDD = rdd.repartition(3)
repartitionedRDD.foreachPartition(func)
})
ssc.start
ssc.awaitTermination
}
}