Spark 实现 对mysql的insert or update if exist:有则更新,没有则插入
废话不说,直接上代码:
(因为是从已近上线的代码里造了个demo出来,线上代码已经在跑了,但这个demo没有测试,如果有遇到错误还请跟我说一下,我好改一下,但是整体思路是这样的。)
import java.sql.{Connection, DriverManager, Statement}
import java.util.Properties
import org.apache.spark.sql.{DataFrame, SparkSession}
object CsdnTest {
/// 程序入口
def main(args: Array[String]): Unit = {
val ss = SparkSession.builder
.appName("Test")
.getOrCreate()
import ss.implicits._
val df = Seq(
("project_1", 12345),
("project_2", 67890)
).toDF("project_code", "cost")
// 没有则插入,有则更新
insertOrUpdateDetail(df)
}
// mysql 连接信息
val SQL_IP_PORT = "你的IP:你的port"
val SQL_DB_NAME = "数据库名称"
// 设置支持批量操作
val SQL_BATCH_PARAM = "rewriteBatchedStatements=true"
val SQL_DB_MARKET_URL: String = s"jdbc:mysql://${SQL_IP_PORT}/${SQL_DB_NAME}?${SQL_BATCH_PARAM}"
val SQL_DB_USERNAME = "username"
val SQL_DB_PASSWORD = "password"
val SQL_MBA_PROJECT_DETAIL_TABLE = "table"
val UTF8 = "UTF-8"
def getDBProps(): Properties = {
val props = new Properties()
props.put("user", SQL_DB_USERNAME)
props.put("password", SQL_DB_PASSWORD)
props.setProperty("useSSL", "false")
props.setProperty("useUnicode", "true")
props.setProperty("characterEncoding", UTF8)
props
}
def insertOrUpdateDetail(df: DataFrame) = {
var connection: Connection = null
// 每个分区创建一个mysql连接,能大大降低连接数
df.foreachPartition(iter => {
try {
connection = DriverManager.getConnection(SQL_DB_MARKET_URL, getDBProps())
val statement: Statement = connection.createStatement()
while (iter.hasNext) {
val row = iter.next()
val projectCode = row.getAs[String]("project_code")
val cost = row.getAs[String]("cost")
// 拼写mysql的insert or update语句
val sql = s"INSERT INTO yourTable " +
s"(project_code, cost) " +
s"VALUES('$projectCode', '$cost') " +
s"ON DUPLICATE KEY " +
s"UPDATE project_code='$projectCode', cost='$cost';"
// 添加到batch中,用于批量执行,降低耗时
statement.addBatch(sql)
}
// 批量执行
statement.executeBatch()
} catch {
case e: Exception => e.printStackTrace()
} finally {
connection.close()
}
})
}
}