实例代码
package com.spark
import org.apache.spark.sql.SparkSession
/**
* 使用外部数据源综合查询Hive mysql数据
*/
object HiveMySQLAPP {
def main(args: Array[String]): Unit = {
val spark=SparkSession.builder().appName("HiveMySQLAPP").master("local[2]").getOrCreate()
//加载Hive表数据
val hiveDF=spark.table("emp")
//加载MySQL的数据
val mysqlDF=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306").option("dbtable","hive.TBLS")
.option("user","root").option("password","hadoop").option("driver","com.mysql.jdbc.Driver").load()
//JOIN
val resultDF=hiveDF.join(mysqlDF,hiveDF.col("deptno")===mysqlDF.col("deptno"))
resultDF.show()
spark.stop()
}
}