环境准备:
请确保安装了MySQL的驱动,spark sql的环境依赖,并配置好了hive的环境和相关依赖文件。
<!-- spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${spark.scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${spark.scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- spark-hive -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${spark.scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- mysql -->
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>${mysql.version}</version>
</dependency>
<!-- hive-exec -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- HBase 驱动 -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
Scala代码实现
import java.util.Properties
import org.apache.spark.sql.{SaveMode, SparkSession}
object HiveToMySQL {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("hive_to_mysql")
.master("local[*]")
.enableHiveSupport()
.getOrCreate()
val conf = new Properties()
val dbName: String = "写你的数据库名"
conf.setProperty("driver","com.mysql.cj.jdbc.Driver")
conf.setProperty("url",s"jdbc:mysql://主机名:3306/$dbName?createDatabaseIfNotExist=true")
conf.setProperty("user","mysql用户名")
conf.setProperty("password","mysql密码")
spark.sql(s"show tables from $dbName")
.collect()
.foreach(
row => {
val tableName: String = row.getString(1)
spark.table(s"$dbName.$tableName")
.repartition(1)
.write
.mode(SaveMode.Overwrite)
.jdbc(conf.getProperty("url"),tableName,conf )
}
)
}
}
测试
create database if not exists test_hive_to_mysql;
use test_hive_to_mysql;
create table test1(
name string,
id int
);
create table test2(
name string,
id int
);
create table test3(
name string,
id int
);
insert into test1 values ('jerry',1);
insert into test1 values ('tom',2);
insert into test2 values ('james',3);
insert into test3 values ('murry',5);
insert into test3 values ('jack',6);
最后运行代码,去MySQL查询一下数据
最后一行找到了与hive 数据库同名的数据库
查询一下数据