package com.job
import org.apache.commons.cli.{BasicParser, CommandLine, Options}
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StringType, StructType}
case class Transfer2HiveConfig(query_day: String)
object Transfer2HiveJob {
val QUERY_DAY = "query-day"
def parseAsConfig(commandLine: CommandLine): Option[Transfer2HiveConfig] = {
val queryDay = commandLine.getOptionValue(QUERY_DAY)
Some(Transfer2HiveConfig(queryDay))
}
def main(args: Array[String]): Unit = {
val parser = new BasicParser()
val options = new Options()
options.addOption("qd", QUERY_DAY, true, "query day")
val commandLine = parser.parse(options, args)
parseAsConfig(commandLine) match {
case Some(config) =>
val spark = SparkSession.builder()
.appName(s"app-name")
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.enableHiveSupport()
.getOrCreate()
try {
new Transfer2HiveJob(spark, config).run()
} catch {
case ex: Throwable =>
println(s"error running ${classOf[Transfer2HiveJob].getSimpleName}, $ex")
sys.exit(1)
}
case None => sys.exit(1)
}
}
}
class Transfer2HiveJob(spark: SparkSession, config: Transfer2HiveConfig) {
import spark.implicits._
def run(): Unit = {
// path也可以通过参数传入
transfer2hive("path")
}
def transfer2hive(path: String): Unit = {
println(s"-----------------path=$path")
// 创建Schema
val dataSchema: StructType = new StructType()
.add("column1", StringType)
.add("column2", StringType)
// 使用哪个db
spark.sql("use xxx")
// 读取hdfs文件的数据
val data = spark.sparkContext.textFile(path)
.map(item => item.split("\t"))
.filter(item => item.length == 2)
.map(item => Row(item(0), item(1)))
// 基于读取的数据和对应的schema创建临时表
spark.createDataFrame(data, dataSchema)
.createTempView(s"table_temp")
// 删除分区
val dropPartitionSQL = s"alter table table_name drop if exists partition(p_day='${config.query_day}')"
spark.sql(dropPartitionSQL)
// 将数据写入hive表对应的分区
val insert2hiveSQL = s"insert into table_name " +
s"partition(p_day='${config.query_day}') " +
s"select column1, column2 from table_temp"
spark.sql(insert2hiveSQL)
}
}