spark连接HIVE,MYSQL,Parquet,及从它们导入和引出数据

连接数据库和导入数据库

pom导入

<dependency>
  <groupId>mysql</groupId>
  <artifactId>mysql-connector-java</artifactId>
  <version>8.0.29</version>
</dependency>
<dependency>
  <groupId>org.apache.parquet</groupId>
  <artifactId>parquet-common</artifactId>
  <version>1.8.2</version>
</dependency>
<dependency>
  <groupId>org.apache.spark</groupId>
  <artifactId>spark-hive_2.11</artifactId>
  <version>2.3.4</version>
</dependency>

 java代码:

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
​
import java.util.Properties
​
object JDBCUtile {
  val url = "jdbc:mysql://192.168.52.146:3306/etclog"
  val driver = "com.mysql.cj.jdbc.Driver"
  val user = "root"
  val password = "ok"
  val frame = "frame";
  private val prop = new Properties()
  prop.setProperty("user", user)
  prop.setProperty("password", password)
  prop.setProperty("dirver", driver)
​
​
  def dataFrameToMysql(df: DataFrame, table: String, op: Int = 1): Unit = {
    if (op == 0) {
      df.write.mode(SaveMode.Append).jdbc(url, table, prop)  //
    } else if (op == 1) {
      df.write.mode(SaveMode.Overwrite).jdbc(url, table, prop)
    }
  }
​
  def dataFrameToParquet(df: DataFrame, output: String, op: Int = 1): Unit = {
    if (op == 0) {
      df.write.mode(SaveMode.Append).parquet(output)  
    } else if (op == 1) {
      df.write.mode(SaveMode.Overwrite).parquet(output)
    }
  }
​
  //保存到Hive
  def dataFrameHive(df: DataFrame, table: String, op: Int = 1): Unit = {
    if (op == 0) {
      df.write.mode(SaveMode.Append).saveAsTable(table)    //模式选择
    } else if (op == 1) {
      df.write.mode(SaveMode.Overwrite).saveAsTable(table)
    }
  }
​
  //从MySQL中取数据到spark dataframe
  def getDataFrameFromMysql(spark: SparkSession, table: String): DataFrame = {
    val frame: DataFrame = spark.read.jdbc(url, table, prop)
    frame
  }
​
  //从Parquest中取数据到spark dataframe
  def getDataFrameFromParquest(spark: SparkSession, path: String): DataFrame = {
    val frame: DataFrame = spark.read.parquet(path)
    frame
  } 
//从hive中取数据到spark dataframe
  def getDataFrameFromHive(spark: SparkSession, table: String): DataFrame = {
    val frame: DataFrame = spark.sql("select * from " + table)
    frame
  }
}

 从MySQL导入数据处理演示(另外几个差不多),可以将数据库的数据进行处理再导进数据库

import org.apache.spark.sql.SparkSession
object aaa {
  def main(args: Array[String]): Unit = {
    val spark=SparkSession.builder().appName("Student")
      .master("local[*]").getOrCreate()
    val Student = JDBCUtile.getDataFrameFromMysql(spark, "Student")
    val spark2=SparkSession.builder().appName("SC")
      .master("local[*]").getOrCreate()
    val SC = JDBCUtile.getDataFrameFromMysql(spark2, "SC") 
    import spark.implicits._ //隐式类
    val aaa=SC.filter(x=>x.getAs("CId")=="01")
    val bbb=SC.filter(x=>x.getAs("CId")=="02")
    val ccc=aaa.join(bbb,Seq("SId"),"inner").toDF("SID","CID1","score1","CID2","score2")
      .filter(x=>x.getAs("score1").toString.toDouble > x.getAs("score2").toString.toDouble).select("SID")
    val ddd=ccc.join(Student,Seq("SId"),"inner").show()
    spark.close()
    spark2.close()
  }
}

 将数据导入MySQL或者HIVE,Parquet也很简单,根据所要的类塞进去就可以了

//JDBCUtile.dataFrameToMysql(frame,JDBCUtile.frame)  //将dataframe数据导入MySQL
//JDBCUtile.dataFrameToParquet(frame,"out/fullog",op=1)  //将dataframe数据导入Parquet
//JDBCUtile.dataFrameHive(frame,"sss",1)    //将dataframe数据导入Hive
  • 4
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值