关于spark-sql的读写文件的方法,数据源分为这几类:
1.jdbc连接mysql的文件的读写
//从mysql中读取数据,需要指定表,驱动类。
val url = "jdbc:mysql://localhost:3306/bbs?characterEncoding=utf-8"
val tname = "t_acc_Ip"
val conn = new Properties()
conn.setProperty("user","root")
conn.setProperty("password","123456")
conn.setProperty("Driver","com.mysql.jdbc.Driver")
val jdbc: DataFrame = session.read.jdbc(url,tname,conn)
//将数据处理后写出到mysql
jdbc.select("*").write.mode(SaveMode.Overwrite).jdbc(url,"emp",conn)
2.parquet文件格式的读写
//读取parquet文件
val file: DataFrame = session.read.parquet("test.parquet")
val data: Dataset[Row] = file.limit(10)
data.printSchema()