spark中提供了jdbcRDD连接mysql数据库操作
import java.sql.DriverManager
import org.apache.spark.rdd.JdbcRDD
import org.apache.spark.{SparkConf, SparkContext}
object JdbcRDDDemo {
def main(args: Array[String]): Unit = {
//1.spark程序入口
val conf: SparkConf = new SparkConf().setAppName("Urlcount").setMaster("local[2]")
val sc: SparkContext = new SparkContext(conf)
//匿名函数
val connection =() =>{
Class.forName("com.mysql.jdbc.Driver").newInstance()
DriverManager.getConnection("jdbc:mysql://localhost:3306/urlcount?charatorEncoding=utf-8","root","root")
}
//查询数据
val jdbcRdd: JdbcRDD[(Int,String,String)] = new JdbcRDD(
//指定sparkcontext
sc,
connection,
"SELECT * FROM url_data where uid >= ? AND uid <= ?",
//两个任务并行 ,其中1,4代表上面sql语句中的两个占位符? ?,如果定义两个分区,
//最好使用>= <= 而不是> <,否则容易导致一些结果丢失
1, 4, 2,
r => {
val uid = r.getInt(1)
val xueyuan = r.getString(2)
val number_one = r.getString(3)
(uid,xueyuan,number_one)
}
)
val jrdd = jdbcRdd.collect()
println(jrdd.toBuffer)
sc.stop()
}
}