Spark将DataFrame进行一些列处理后,需要将之写入mysql,下面是实现过程
1.mysql的信息
mysql的信息我保存在了外部的配置文件,这样方便后续的配置添加。
1 //配置文件示例:
2 [hdfs@iptve2e03 tmp_lillcol]$ cat job.properties3 #mysql数据库配置4 mysql.driver=com.mysql.jdbc.Driver5 mysql.url=jdbc:mysql://127.0.0.1:3306/database1?useSSL=false&autoReconnect=true&failOverReadOnly=false&rewriteBatchedStatements=true
6 mysql.username=user7 mysql.password=123456
2.需要的jar依赖(sbt版本,maven的对应修改即可)
1 libraryDependencies += "org.apache.spark" % "spark-core_2.10" % "1.6.0-cdh5.7.2"
2 libraryDependencies += "org.apache.spark" % "spark-sql_2.10" % "1.6.0-cdh5.7.2"
3 libraryDependencies += "org.apache.spark" % "spark-hive_2.10" % "1.6.0-cdh5.7.2"
4 libraryDependencies += "org.apache.hbase" % "hbase-client" % "1.2.0-cdh5.7.2"
5 libraryDependencies += "org.apache.hbase" % "hbase-server" % "1.2.0-cdh5.7.2"
6 libraryDependencies += "org.apache.hbase" % "hbase-common" % "1.2.0-cdh5.7.2"
7 libraryDependencies += "org.apache.hbase" % "hbase-protocol" % "1.2.0-cdh5.7.2"
8 libraryDependencies += "mysql" % "mysql-connector-java" % "5.1.38"
9 libraryDependencies += "org.apache.spark" % "spark-streaming_2.10" % "1.6.0-cdh5.7.2"
10 libraryDependencies += "com.yammer.metrics" % "metrics-core" % "2.2.0"
3.完整实现代码
1 importjava.io.FileInputStream2 importjava.sql.{Connection, DriverManager}3 importjava.util.Properties4
5 importorg.apache.spark.sql.hive.HiveContext6 importorg.apache.spark.sql.{DataFrame, SQLContext, SaveMode}7 importorg.apache.spark.{SparkConf, SparkContext}8
9 /**
10 *@authorAdministrator11 * 2018/10/16-10:1512 *13 */
14 object SaveDataFrameASMysql {15 var hdfsPath: String = ""
16 var proPath: String = ""
17 var DATE: String = ""
18
19 val sparkConf: SparkConf = newSparkConf().setAppName(getClass.getSimpleName)20 val sc: SparkContext = newSparkContext(sparkConf)21 val sqlContext: SQLContext = newHiveContext(sc)22
23 def main(args: Array[String]): Unit ={24 hdfsPath = args(0)25 proPath = args(1)26 //不过滤读取
27 val dim_sys_city_dict: DataFrame = readMysqlTable(sqlContext, "TestMysqlTble1", proPath)28 dim_sys_city_dict.show(10)29
30 //保存mysql
31 saveASMysqlTable(dim_sys_city_dict, "TestMysqlTble2", SaveMode.Append, proPath)32 }33
34 /**
35 * 将DataFrame保存为Mysql表36 *37 *@paramdataFrame 需要保存的dataFrame38 *@paramtableName 保存的mysql 表名39 *@paramsaveMode 保存的模式 :Append、Overwrite、ErrorIfExists、Ignore40 *@paramproPath 配置文件的路径41 */
42 def saveASMysqlTable(dataFrame: DataFrame, tableName: String, saveMode: SaveMode, proPath: String) ={43 var table =tableName44 val properties: Properties =getProPerties(proPath)45 val prop = new Properties //配置文件中的key 与 spark 中的 key 不同 所以 创建prop 按照spark 的格式 进行配置数据库
46 prop.setProperty("user", properties.getProperty("mysql.username"))47 prop.setProperty("password", properties.getProperty("mysql.password"))48 prop.setProperty("driver", properties.getProperty("mysql.driver"))49 prop.setProperty("url", properties.getProperty("mysql.url"))50 if (saveMode ==SaveMode.Overwrite) {51 var conn: Connection = null
52 try{53 conn =DriverManager.getConnection(54 prop.getProperty("url"),55 prop.getProperty("user"),56 prop.getProperty("password")57 )58 val stmt =conn.createStatement59 table =table.toUpperCase60 stmt.execute(s"truncate table $table") //为了不删除表结构,先truncate 再Append
61 conn.close()62 }63 catch{64 case e: Exception =>
65 println("MySQL Error:")6