1,sparkSession.read.jdbc
val modelDataSql = "select pay_time, ... "
//根据时间分区
val partitionClause = Array(
"EXTRACT(month FROM pay_time) = 1 "
, "EXTRACT(month FROM pay_time) = 2 "
, "EXTRACT(month FROM pay_time) = 3 "
, "EXTRACT(month FROM pay_time) = 4 "
, "EXTRACT(month FROM pay_time) = 5 "
, "EXTRACT(month FROM pay_time) = 6 "
, "EXTRACT(month FROM pay_time) = 7 "
, "EXTRACT(month FROM pay_time) = 8 "
, "EXTRACT(month FROM pay_time) = 9 "
, "EXTRACT(month FROM pay_time) = 10 "
, "EXTRACT(month FROM pay_time) = 11 "
, "EXTRACT(month FROM pay_time) = 12 "
)
sparkSession
.read
.jdbc(
"jdbc.xxxx.url"
, s" ( $modelDataSql ) T"
, partitionClause
, jdbcProperties
)
2,写在option中
var dataDf: DataFrame = spark.read.format("jdbc")
.option("url", jdbcArgs.get("url").get)
.option("driver", jdbcArgs.get("driver").get)
.option("user", jdbcArgs.get("user").get)
.option("password", jdbcArgs.get("password").get)
.option("fetchSize", jdbcArgs.get("fetchSize").get)
.option("dbtable", "(" + jdbcArgs.get("sql").get + ") temp")
.option("partitionColumn","xxxxx")
.option("lowerBound","xxxxx")
.option("upperBound","xxxxx")
.option("numPartitions",xxx)
.load()