package com.mingzhi.universal
import com.mingzhi.interf.IDate
import com.mingzhi.utils.{SparkUtils, TableUtils}
import org.apache.spark.sql.{SaveMode, SparkSession}
/**
* 全量导入
* 一些配置表,如 tbworkorder_config_detail
* 参数:
* 192.168.0.201 paascloud_wfs tbworkorder_config_detail paascloud tbworkorder_config_detail root '/r,G)7r8kVTo' 2020-06-01 2020-06-02 false
*/
object load_all_from_mysql {
def main(args: Array[String]): Unit = {
if (System.getProperties.getProperty("os.name").contains("Windows")) {
load(null, null, null, null, null,
null, null, null, null)
return
}
load(args(0), args(1), args(2), args(3), args(4), args(5), args(6), args(7), args(8))
}
/**
* conf.host = args(0)
* conf.from_db = args(1)
* conf.from_table = args(2)
* conf.to_db = args(3)
* conf.to_table = args(4)
* conf.user_name = args(5)
* conf.password = args(6)
* conf.dt = args(7)
* conf.dt1 = args(8)
*
*/
def load(host: String, from_db: String, from_table: String, to_db: String,
to_table: String, user_name: String, password: String
, dt: String, dt1: String
) = {
val conf = SparkUtils.getDefConf
val builder = SparkSession.builder()
builder
.config("spark.sql.parquet.writeLegacyFormat", "true")
.enableHiveSupport()
if (System.getProperties.getProperty("os.name").contains("Windows")) {
builder.master("local[*]")
conf.from_db = "paascloud_wfs"
conf.from_table = "tbworkorder_config_detail"
conf.to_db = "paascloud"
conf.to_table = conf.from_table
conf.user_name = "root"
conf.password = "/r,G)7r8kVTo"
} else {
conf.host = host
conf.from_db = from_db
conf.from_table = from_table
conf.to_db = to_db
conf.to_table = to_table
conf.user_name = user_name
conf.password = password
conf.dt = dt
conf.dt1 = dt1
}
builder.appName(conf.from_table)
val spark = builder.getOrCreate()
val jdbcDF = spark.read
.format("jdbc")
.option("url", s"jdbc:mysql://${conf.host}:3306/${conf.from_db}")
.option("user", conf.user_name)
.option("password", conf.password)
.option("dbtable", conf.from_table)
.load()
jdbcDF.createOrReplaceTempView("t_temp")
new IDate {
override def onDate(dt: String): Unit = {
process(spark, "t_temp", conf.to_db, conf.to_table, dt)
}
}.invoke(conf.dt, conf.dt1)
spark.stop()
}
private def process(spark: SparkSession, from_table: String, to_db: String, to_table: String, dt: String) = {
val frame = spark.sql(
s"""
|select '$dt' as dt,* from $from_table
|""".stripMargin)
frame.show(false)
// println(frame.count())
if (TableUtils.tableExists(spark, to_db, to_table)) {
spark.sql(
s"""
|alter table $to_db.$to_table drop if exists partition(dt='$dt')
|""".stripMargin)
println(s"${to_table}表存在,删除${dt}分区的数据")
} else {
println(s"${to_table}表不存在,do nothing")
}
frame
.coalesce(1)
.write
.format("parquet")
.mode(SaveMode.Append)
.partitionBy("dt")
.saveAsTable(s"${to_db}.$to_table")
}
}