Spark支持通过Java JDBC访问关系型数据库,需要通过JdbcRDD进行访问,示例如下:
添加依赖
// 在build.sbt中添加依赖
libraryDependencies ++= Seq (
"mysql" % "mysql-connector-java" % "5.1.47"
)
MySQL读取
import java.sql.DriverManager
import org.apache.spark.rdd.JdbcRDD
import org.apache.spark.{SparkConf, SparkContext}
object MySQLDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("MySQLDemo").setMaster("local[4]")
val sc = new SparkContext(conf)
val driver = "com.mysql.jdbc.Driver"
val url = "jdbc:mysql://172.16.0.31:3306/db_canal_test"
val username = "root"
val password = "123456"
val sql = "select name, age from tbl_person_info where id >= ? and id <= ?"
val jdbcRdd = new JdbcRDD(sc,
() => DriverManager.getConnection(url, username, password),
sql, 1, 2, 2,
(res) => {
println(res.getString(1) + ", " + res.getInt(2))})
jdbcRdd.collect()
sc.stop()
}<