注意最后为什么使用了 rdd 的foreachPartition()
package day5.KafkaSource
import java.sql.{Connection, PreparedStatement}
import kafka.serializer.StringDecoder
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Durations, Seconds, StreamingContext}
import utils.C3p0Utils
import scala.collection.mutable
/**
* @author wade
* @create 2019-03-12 18:42
*/
object HighKafkaSource {
def main(args: Array[String]): Unit = {
//这种创建方式 保证每次启动的ssc 一致
val ssc: StreamingContext =
StreamingContext.getActiveOrCreate("./A1",getStreamContex)
ssc.start()
ssc.awaitTermination()
}
def getStreamContex(): StreamingContext ={
val conf: SparkConf = new SparkConf().setAppName("high1").setMaster("local[*]")
val ssc = new StreamingContext(conf,Seconds(3))
ssc.checkpoint("./A1")
//必须按照这个方式,对于用到ssc的地方,
var params = Map[String,String](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG ->"hadoop103:9092,hadoop104:9092,hadoop105:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "big1015"
// ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG-> "org.apache.kafka.common.serialization.StringDeserializer",
// ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer"
)
val inputDs: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
ssc,
params,
Set[String]("highkafka"))
inputDs.foreachRDD(rdd => {
rdd.foreachPartition(ts =>{
//ts是一个分区数据
//把第二个值存入mysql,因为在分区内部,不存在序列化
val conn: Connection = C3p0Utils.getConnection
val sql = "INSERT INTO staff VALUES (NULL,?,'male') "
val ps: PreparedStatement = conn.prepareStatement(sql)
ts.foreach(t => {
val value: String = t._2
println(value)
ps.setString(1,value)
ps.addBatch()
})
ps.executeBatch()
ps.close()
conn.close()
})
})
ssc
}
}
//问题2 、
// WARN [Executor task launch worker for task 1] - Property key.deserializer is not valid
// WARN [Executor task launch worker for task 1] - Property value.deserializer is not valid