package com.ibeifeng.sql
import org.apache.spark.sql
import org.apache.spark.rdd.RDD
import java.util.Properties
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
object HiveJoinMySQLDemo {
def main(args: Array[String]): Unit = {
//1.构建SparkSession
val warehouselocation = “/user/hive/warehouse”
val spark = new sql.SparkSession
.Builder()
.appName(“DataFrame 2 RDD”)
.master(“local”)
.config(“warehouselocation”,”/user/hive/warehouse”)
.enableHiveSupport()
.getOrCreate()
//建立jdbc链接
import spark.sql
import spark.implicits._
val url = “jdbc:mysql://bd23-server6.ibeifeng.com:3306/test”
val table =”tb_dept”
val table2 =”dept”
val props = new Properties()
props.put(“user”,”root”)
props.put(“password”,”123456”)
// 1.Hive表的dept数据导入到MySQL中
spark.read.table(“default.emp”) .write.mode(“overwrite”).jdbc(url,table,props)
// 2. hive和MySQL数据Join操作
// 2.1读取MySQL的数据
val df = spark.read.jdbc(url,table2,props).createOrReplaceTempView(“tmp_tb_dept”)
// 数据聚合
spark.sql(
“””
|select b.*,a.sal,a.empno
|from default.emp a
|join tmp_tb_dept b
|on a.empno = b.epno
“”“.stripMargin)
.createOrReplaceTempView(“tmp_emp_join_dept_result”)
spark.sql("select * from tmp_emp_join_dept_result").show()
spark.read.table("tmp_emp_join_dept_result").cache()
// 3. 数据输出
spark.read.table(“tmp_emp_join_dept_result”)
.write.format(“parquet”)
.save(s”/spark/o2o23/result_pvuv/${System.currentTimeMillis()}/”)
//保存数据到Hive表,并且parquet格式,按照deptno字段分区
spark
.read
.table("tmp_emp_join_dept_result")
.write
.format("parquet")
.partitionBy("deptno")
.mode("overwrite")
.saveAsTable("hive_emp_dept")
spark.sql("show tables").show()
spark.catalog.uncacheTable("tmp_emp_join_dept_result")
}
}