一、Spark 读取 mysql 数据
网上Spark读取mysql数据的资料还是挺多的,这里贴一下我的代码:
新建1个Scala的”Object“
import org.apache.spark.sql.{DataFrame, SparkSession}
object CsdnTest {
def main(args: Array[String]): Unit = {
val ss = SparkSession.builder
.appName("Test")
.getOrCreate()
getCostTable(ss)
}
// mysql 连接信息
val SQL_IP_PORT = "你的IP:你的port"
val SQL_DB_NAME = "数据库名称"
// 设置支持批量操作
val SQL_BATCH_PARAM = "rewriteBatchedStatements=true"
val SQL_DB_MARKET_URL: String = s"jdbc:mysql://${SQL_IP_PORT}/${SQL_DB_NAME}?${SQL_BATCH_PARAM}"
val SQL_DB_USERNAME = "username"
val SQL_DB_PASSWORD = "password"
val SQL_MBA_PROJECT_DETAIL_TABLE = "table"
val UTF8 = "UTF-8"
// 获取收入表的数据
def getCostTable(ss: SparkSession): DataFrame = {
// 设置初始查询条件:只取cost不为空的数据
val subTableQuery = s" (select project_code, cost as cost " +
s"from ${SQL_MBA_PROJECT_DETAIL_TABLE} " +
s"where cost is not null) " +
s"as tmp "
// 获取
val result = getMysqlTableDf(ss, subTableQuery)
result.show()
result
}
/**
*
* 获取mysql 表数据 => DataFrame
*
* @param ss SparkSession
* @param subTableQuery 初始查询条件
* @return
*/
def getMysqlTableDf(ss: SparkSession, subTableQuery: String): DataFrame = {
ss.read.format("jdbc")
.option("driver", "com.mysql.jdbc.Driver")
.option("url", SQL_DB_MARKET_URL)
.option("user", SQL_DB_USERNAME)
.option("password", SQL_DB_PASSWORD)
.option("useUnicode", "true")
.option("useSSL", "false")
.option("characterEncoding", UTF8)
.option("dbtable", subTableQuery)
.load()
}
}