今天在把数据从kafka写入sql时莫名写不进去,代码没有任何问题,找了很久的错误,在交换依赖的顺序后竟然奇迹般成功了,写篇文章提醒自己,今后先加重要依赖,再加辅助依赖
以下为正确的依赖顺序:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>homework1014</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
代码段:
package work
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import java.sql.{Connection, DriverManager, PreparedStatement}
object Test1_work2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("Test1_work2")
//参数2:时间 以5秒为单位,处理一批数据
val ssc = new StreamingContext(conf,Seconds(5))
ssc.sparkContext.setLogLevel("WARN")
val map = Map(
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.GROUP_ID_CONFIG -> "g1",
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "hadoop10:9092"
)
val dstream1: DStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe(List("topica"), map))
dstream1.map(v => {
val arr: Array[String] = v.value().split("\t")
(arr(2).split(" ")(0),1)
}).reduceByKey(_+_)
.foreachRDD(rdd =>{
rdd.foreachPartition(iter=>{
val conn: Connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test1?useUnicode=true&characterEncoding=utf8&useSSL=false", "root", "root")
val ps: PreparedStatement = conn.prepareStatement("insert into count_comment values(?,?)")
iter.foreach(v=>{
println(v)
ps.setString(1,v._1)
ps.setInt(2,v._2)
ps.executeUpdate()
})
ps.close()
conn.close()
})
})
ssc.start() //启动流计算
ssc.awaitTermination() //一直处于运行状态
}
}