import java.util.Properties
import cn.doit.sparksql.day01.utils.SparkUtils
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
/**
* @description:DataFrame保存到mysql
**/
object DFSaveMysql {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkUtils.getSparkSession()
import spark.implicits._
// 获得一个DF
val frame: DataFrame = spark.read
.options(Map("header" -> "true", "inferSchema" -> "true"))
.csv("doc/stu2.csv")
frame.printSchema()
frame.show()
val pro = new Properties()
pro.setProperty("user", "root")
pro.setProperty("password", "123456")
//
// Table or view 'people' already exists. SaveMode: ErrorIfExists.;
/***
* def mode(saveMode: SaveMode)
*
* 警告:Wed Jan 01 15:02:58 CST 2020 WARN: Establishing SSL connection without server's
* identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+
* requirements SSL connection must be established by default if explicit option isn't set.
* For compliance with existing applications not using SSL the verifyServerCertificate property
* is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false,
* or set useSSL=true and provide truststore for server certificate verification.
*
* 异常:Exception in thread "main" org.apache.spark.sql.AnalysisException:Table or view 'people' already exists. SaveMode: ErrorIfExists.;
*/
frame.write.mode(SaveMode.Append).jdbc("jdbc:mysql://localhost:3306/bigdata", "people", pro)
}
}
DataFrame保存成.csv .json parquet文件的代码实现
import cn.doit.sparksql.day01.utils.SparkUtils
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* DataFrame保存成.csv .json parquet文件
**/
object DFSaveFiles {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkUtils.getSparkSession()
import spark.implicits._
// 获得一个DF
val frame: DataFrame = spark.read
.options(Map("header" -> "true", "inferSchema" -> "true"))
.csv("doc/stu2.csv")
frame.printSchema()
frame.show()
/** *
* 保存为json
* 保存为CSV 没有头文件
* 保存为CSV 有头文件
* 保存为parquet
* 保存为 text ,这个函数只能保存单列
* frame.write.text 的保存信息如下:
* Text data source supports only a single column, and you have 6 columns.;
*/
// frame.write.json("doc/output/json")
// frame.write.csv("doc/output/csv1")
// frame.write.option("header", true).csv("doc/output/csv2")
// frame.write.parquet("doc/output/parquet")
import org.apache.spark.sql.functions._
// def concat_ws(sep: String, exprs: Column*)
// frame.write.text("doc/output/text")
// frame.selectExpr("concat_ws('\t' , id , name , age ,sex ,city,score )").write.text("doc/output/text")
}
}