package com.wedoctor.sparksql
import java.io.InputStream
import java.util.Properties
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{DataFrame, Dataset, SaveMode, SparkSession}
object JoinDemo {
Logger.getLogger("org").setLevel(Level.ERROR)
def main(args: Array[String]): Unit = {
/*val prop = new Properties()
val stream: InputStream = this.getClass.getClassLoader.getResourceAsStream("application.conf")
prop.load(stream)
val dbUrl = prop.getProperty("db.url")
println(dbUrl)*/
//默认加载配置文件application.conf(.json .properties)
/*val config: Config = ConfigFactory.load()
println(config.getString("db.url"))*/
val spark: SparkSession = SparkSession.builder()
.master("local[*]")
.appName(this.getClass.getSimpleName)
.getOrCreate()
//导入隐式转换
import spark.implicits._
val ds1: Dataset[String] = spark.createDataset(List[String]("001 small 20 leegg","002 yz 23 xx","003 sl 25 zhishu","004 xxoo 26 weizhi"))
val ds2: Dataset[String] = spark.createDataset(List("001,5000","002,3000","003,4000"))
val actors: Dataset[(String, String, Int, String)] = ds1.map(str => {
val arr: Array[String] = str.split(" ")
(arr(0), arr(1), arr(2).toInt, arr(3))
})
val sal: Dataset[(Int, Int)] = ds2.map(str => {
val arr: Array[String] = str.split(",")
(arr(0).toInt, arr(1).toInt)
})
val df3: DataFrame = actors.toDF("num","name","age","emp")
val df4: DataFrame = sal.toDF("num","sal")
//sql
df3.createTempView("v_actor")
df4.createTempView("v_sal")
val result: DataFrame = spark.sql("select a.num,a.name,a.age,a.emp,b.sal from v_actor a left join v_sal b on a.num = b.num")
val url = "jdbc:mysql://hdp-01:3306/test?characterEncoding=utf-8"
val conn: Properties = new Properties()
conn.setProperty("user","root")
conn.setProperty("password","123456")
//本地运行库不加 集群运行必须加
conn.setProperty("driver","com.mysql.jdbc.Driver")
result.write.mode(SaveMode.Overwrite).jdbc(url,"v_test",conn)
spark.close()
}
}