记录一次实际工作中SparkSQL向Mysql写入数据,效率很低很慢的解决过程。
原始代码如下:
object Kudu2Mysql {def main(args: Array[String]): Unit = {val sparkSession: SparkSession = SparkSession.builder().master("local[6]").appName("kudu2Mysql").getOrCreate()val kuduMaster = "192.168.100.100:7051,192.168.100.100:7051,192.168.100.100:7051"val kuduTable = "kudu_test"val kuduSql ="""select * from tmp_kuduTable where day = '2020-03-16'"""val kuduDatas = sparkSqlReadKudu(sparkSession, kuduMaster, kuduTable, kuduSql)val mysqlTable = "kudu_test"saveToMysql(kuduDatas, mysqlTable)sparkSession.stop()}private def sparkSqlReadKudu(sparkSession: SparkSession, kuduMaster: String, kuduTable: String, sql: String) = {val options = Map("kudu.master" -> kuduMaster,"kudu.table" -> kuduTable)val kuduDatas: DataFrame = sparkSession.read.format(