依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.2.0</version>
</dependency>
代码
此处还多了写入数据库的步骤。
连接后返回的是一个rdd的集合。通过foreachRdd来进行对每个Rdd
的读取。
package day07.tongji
import java.util.Properties
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import scala.tools.nsc.doc.base.comment.Subscript
object NewsFromKafka1 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("KafkaToSpark").setMaster("local[*]")
val scc = new StreamingContext(conf, Seconds(2))
val session = SparkSession.builder().master("local[*]").appName(this.getClass.getName).getOrCreate()
import session.implicits._
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hadoop101:9092,hadoop102:9092,hadoop103:9092",//用于初始化链接到集群的地址
"key.deserializer" -> classOf[StringDeserializer],//key序列化
"value.deserializer" -> classOf[StringDeserializer],//value序列化
"group.id" -> "group1",//用于标识这个消费者属于哪个消费团体
"auto.offset.reset" -> "latest",//偏移量 latest自动重置偏移量为最新的偏移量
"enable.auto.commit" -> (false: java.lang.Boolean)//如果是true,则这个消费者的偏移量会在后台自动提交
)
val topics = Array("first")
var directStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(scc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
directStream.foreachRDD(x=>{
var key: RDD[(Int, List[Int])] = x.map(x => x.value()).map(_.split(",")).map(x => {
var ints: List[Int] = List[Int](x(2).toInt, x(3).toInt, x(4).toInt
, x(5).toInt
, x(6).toInt
, x(7).toInt
, x(8).toInt
, x(9).toInt
, x(10).toInt
, x(11).toInt
)
ints
(x(1), ints)
}
).map(x => (1, x._2)).reduceByKey { (x, y) => x zip (y) map (x => x._1 + x._2) }
val props = new Properties()
props.setProperty("driver","com.mysql.jdbc.Driver")
props.setProperty("user","root")
props.setProperty("password","000000")
key.map(x=>(x._2(0),
x._2(1),
x._2(2),
x._2(3),
x._2(4),
x._2(5),
x._2(6),
x._2(7),
x._2(8),
88
))
.toDF("allData", "atpError", "main", "wifi", "balise", "TCR", "speed", "DMI", "TIU", "JRU")
.write.mode(SaveMode.Overwrite)
.jdbc("jdbc:mysql://hadoop101:3306/company?characterEncoding=UTF-8","alldata",props)
})
scc.start()
scc.awaitTermination()
}
}