pom依赖
<!-- 指定仓库位置,依次为aliyun、cloudera和jboss仓库 -->
<repositories>
<repository>
<id>aliyun</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.11.8</scala.version>
<scala.compat.version>2.11</scala.compat.version>
<spark.version>2.2.0</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<plugins>
<!-- 指定编译scala的插件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18.1</version>
<configuration>
<useFile>false</useFile>
<disableXmlReport>true</disableXmlReport>
<includes>
<include>**/*Test.*</include>
<include>**/*Suite.*</include>
</includes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass></mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
代码展示
import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet}
import org.apache.spark.rdd.{JdbcRDD, RDD}
import org.apache.spark.{SparkConf, SparkContext}
object JDBCDataSource{
def main(args: Array[String]): unit={
//Conf配置
//local[*] -- 本地执行,用于测试
val conf: sparkConf = new sparkConf().setAppName("wc").setMaster("local[*]")
//spark执行平台
val sc = new sparkContext(conf)
sc.setLogLevel("WARN")
//准备数据
val dataRDD: RDD[(String,Int)] = sc.parallelize(List(("jack", 18), ("tom", 19), ("rose", 20)))
//将每个分区的数据都发送到Mysql
dataRDD.foreachParatition(data2MySQL)
//查看数据
//JdbcRDD准备参数
val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/database?characterEncoding=UTF-8","userName","passWord")
val sql: String = "select id,name,age from tableName"
val mapRow = (res: ResultSet) => {
val id: Int = res.getInt("id")
val name: String = res.getString("name")
val age: Int = res.getInt("age")
(id,name,age)
}
//Spark自带API获取Mysql数据,具体提示看idea的api提示
val resRDD = new JdbcRDD[(Int,String,Int)](
//
sc,
conn,
sql,
2,
mapRow
)
resRDD.colltct().foreach(println)
}
//数据添加到MySql
def data2MySQL(p: Iterator[(String,Int)]): Unit = {
//获取连接
val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/database?characterEncoding=UTF-8","userName","passWord")
val sql:String = "insert into tableName (`id`,`name`,`age`) values (NULL,?,?);"
val ps: PreparedStatement = conn.prepareStatement(sql)
//获取数据,写入到sql
p.foreach(t=>{
val name: String = t._1
val age: Int = t._2
ps.setString(1,name)
ps.setInt(2,age)
ps.executeUpdate()
})
conn.close()
ps.close()
}
}