支持通过Java JDBC访问关系型数据库。需要通过JdbcRDD进行,示例如下:
(1)添加依赖
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.
**(2)从Mysql读取数据**
package spark.mysql
import java.sql.DriverManager
import org.apache.spark.rdd.JdbcRDD
import org.apache.spark.{SparkConf, SparkContext}
object MySqlRdd {
def main(args: Array[String]): Unit = {
//配置spark信息
val conf = new SparkConf().setMaster("local[*]").setAppName("MySqlRdd")
//创建sparkContext
val sc: SparkContext = new SparkContext(conf)
//定义连接mysql 的参数
val driver ="com.mysql.jdbc.Driver"
val url = "jdbc:mysql://hadoop103:3306/rdd"
val userName = "root"
val password = "123"
//创建JDBCRdd
val rdd: JdbcRDD[(Int, String)] = new JdbcRDD(
sc,
() => {
Class.forName(driver) //注册数据库驱动
DriverManager.getConnection(url, userName, password)
},
"select * from rddtale where id >=? and id<=?",
1,
10,
1,
r => (r.getInt(1), r.getString(2))
)
//打印结果
println(rdd.count())
rdd.foreach(println)
sc.stop()
}
}
**(3)往Mysql写入数据**
package spark.mysql
import java.sql.Connection
import org.apache.spark.{SparkConf, SparkContext}
/**
* 向mysql 中写入数据
*/
object writeToMySql {
def main(args: Array[String]): Unit = {
//初始化spark
val conf = new SparkConf().setAppName("writeToMySql").setMaster("local[*]")
//创建sparkContext
val sc: SparkContext = new SparkContext(conf)
//创建rdd
val data = sc.parallelize(List("aaa", "bbb", "ccc"))
//往mysql 中添加数据
data.foreachPartition(insertData)
def insertData(iterator: Iterator[String]): Unit = {
Class.forName("com.mysql.jdbc.Driver").newInstance() //注册数据库驱动
//获取数据库连接
val connection: Connection = java.sql.DriverManager.getConnection(
"jdbc:mysql://hadoop103:3306/rdd", "root", "123")
iterator.foreach(data => {
val ps = connection.prepareStatement("insert into rddtale(name) values(?)")
ps.setString(1, data)
ps.executeUpdate()
})
}
}
}