/**
* json id name t1
* parquet id age t2
* rdd id sex t3
* hive id addr t4
* mysql id hobby t5
*
* 通过spark sql 把五个临时表数据关联
*
* id name age sex addr hobby
* 输出到本地 parquet json 输出 hive 输出jdbc 输出RDD
*
*/
object FiveTable {
def main(args: Array[String]): Unit = {
System.setProperty("hadoop.home.dir", "E:\\software\\bigdate\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0")
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("SparkSql")
val sc = new SparkContext(conf)
//val ssc = new SQLContext(sc)
val hiveContext = new HiveContext(sc)
//1.json
hiveContext
.read
.format("json")
.load("D:\\BDTC08\\sparkData\\person.json")
.registerTempTable("t1")
//本地 以parquet格式读取
hiveContext
.read
.format("parquet")
.load("D:\\BDTC08\\sparkData\\test.parquet")
.registerTempTable("t2")
//mysql
hiveContext
.read
.format("jdbc")
.options(
Map[String, String](
("url", "jdbc:mysql://candle:3306/sqoop"),
("user", "hive"), //设置账户
("password", "hive"), //密码
("dbtable", "human" ) //表格
)
)
.load()
.registerTempTable("t3")
//hive
var hql = "select * from hive.human"
hiveContext
.sql(hql)
.registerTempTable("t4")
//rdd
val sourceRDD = sc.parallelize(List((1, "f"), (2, "m"), (3, "f"), (4, "m")))
// val mapRDD = sourceRDD
// .map(
// x => {
// Row(x._1, x._2)
// }
// )
// val sf1 = StructField("id", IntegerType, true)
// val sf2 = StructField("sex", StringType, true)
// //def apply(fields : scala.Seq[org.apache.spark.sql.types.StructField]) :
// val st = StructType(List(sf1, sf2))
// hiveContext
// .createDataFrame(mapRDD, st)
// .registerTempTable("t5")
import hiveContext.implicits._
sourceRDD
.toDF("id", "sex")
.registerTempTable("t5")
// hiveContext.sql("select * from t1").show()
// hiveContext.sql("select * from t2").show()
// hiveContext.sql("select * from t3").show()
// hiveContext.sql("select * from t4").show()
// hiveContext.sql("select * from t5").show()
//目前为止 向hiveContext 注册了5个表
val sql = "select t1.id, t1.name, t2.age, t3.age as addr, t4.name as hobby , t5.sex " +
"from t1 , t2, t3, t4, t5 " +
"where t1.id=t2.id and t2.id=t3.id and t3.id=t4.id and t4.id=t5.id "
val returnDF = hiveContext.sql(sql)
returnDF
.write
.format("json")
.save("D:\\BDTC08\\sparkData\\json")
returnDF
.write
.format("parquet")
.save("D:\\BDTC08\\sparkData\\parquet")
returnDF
.write
.saveAsTable("hive.FiveTable")
val prop = new Properties
prop.setProperty("user", "hive")
prop.setProperty("password", "hive")
returnDF
.write
.jdbc("jdbc:mysql://candle:3306/sqoop", "fivetable",prop)
returnDF
.rdd
.map(
line => { //Row
val id = line.getAs[Long](0) //从json parquet 默认是long
val name = line.getAs[String](1)
val age = line.getAs[Long](2)
val addr = line.getAs[Int](3)
val hobby = line.getAs[String](4)
val sex = line.getAs[String](5)
id+":"+name+":"+age+":"+addr+":"+hobby+":"+sex
}
)
.foreach(println(_))
}
}