package com.ws.jdbc
import java.sql.DriverManager
import org.apache.spark.rdd.JdbcRDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* spark从mysql读取数据,不需要sqoop了(hadoop需要依靠sqoop导入导出数据至mysql)
*/
object JdbcRDDTest {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("JdbcRDDTest").setMaster("local[4]")
val sc = new SparkContext(conf)
//参数1:sc;参数2:数据库连接;参数3:sql语句;参数4,5:条件;参数6:分区数;参数7:返回结果处理函数
val result: JdbcRDD[(Int, String, Int)] = new JdbcRDD(sc, getConnection,
"select * from ipcount where id >=? and id <= ?", 1, 5, 2,
rs => {
val id = rs.getInt(1)
val province = rs.getString(2)
val count = rs.getInt(3)
(id, province, count)
})
println(result.collect().toBuffer)
println(result.count())
sc.stop()
}
//获取数据库连接函数
val getConnection = () => {
DriverManager.getConnection("jdbc:mysql://192.168.127.13/ip?charatorEncoding=utf-8",
"root", "root")
}
}
spark从mysql读取数据(redis/mongdb/hbase等类似,换成各自RDD即可)
最新推荐文章于 2023-12-13 16:37:21 发布