package com.qh
import java.util.Properties
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.spark.sql.types._
import scala.collection.mutable.ListBuffer
/**
* Created by hadoop on 8/19/16.
*/
// case class 参考 http://www.yiibai.com/scala/scala_pattern_matching.html
case class people(id: Int, name: String, age: Int)
object SparkSQL {
private val url = "jdbc:sqlserver://172.16.177.45:1433;DataBase=CE_DQ"
private val user = "sa"
private val password = "123456"
private val tableName = "SparkSQLTest"
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("ScalaTest")
.setMaster("spark://master:7077")
val sc = new SparkContext(conf)
/*
在Spark2.0.0 中SQLContext和HiveContext,已经过时 其向后兼容,因此仍然可以使用
val sqlContext = new SQLContext(sc)
val hiveContext = new HiveContext(sc)
在这里使用SparkSession
val hiveContext = SparkSession.builder().enableHiveSupport().getOrCreate()
*/
val sqlContext = SparkSession.builder().getOrCreate().sqlContext
// SparkSQL - SQLServer 读取数据
// 在使用SparkSQL连接关系型数据库的时候不需要定义Driver
// 系统会根据url自动选择,但需要将对应的jdbc包引入到$SPARK_HOME/jars
// --------------------------------------------------------------------
val conn = new Properties()
conn.put("user", user)
conn.put("password", password)
var jdbcDF = sqlContext.read.jdbc(url, tableName, conn)
jdbcDF.show()
// --------------------------------------------------------------------
// SparkSQL - SQLServer 写入数据
// --------------------------------------------------------------------
val data = sc.parallelize(Seq(Row(1, null), Row(2, "b"), Row(3, "c")))
// 定义写入数据的类型 参数含义:
// 1 对应的子段名称
// 2 对应的子段数据类型
// 3 输入数据对应子段是否为可以空
val schema = StructType(
List(
StructField("ID", DataTypes.IntegerType, nullable = false),
StructField("Name", DataTypes.StringType, nullable = true)
)
)
jdbcDF = sqlContext.createDataFrame(data, schema)
// 将数据写入SparkSQLTest表中
// mode(SaveMode.Append) 表示的是在该表中追加数据。如果不添加该选项,则默认创建表并写入数据
jdbcDF.write.mode(SaveMode.Append).jdbc(url, tableName, conn)
// --------------------------------------------------------------------
// SparkSQL 操作json作文件
// 文件格式:
// {"name":"Michael"}
// {"name":"Andy", "age":30}
// {"name":"Justin", "age":19}
val jsonDF = sqlContext.read.json("hdfs://master:9000/Spark/sqljson")
jsonDF.show()
/*
+----+-------+
| age| name|
+----+-------+
|null|Michael|
| 30| Andy|
| 19| Justin|
+----+-------+
*/
jsonDF.printSchema()
/*
root
|-- age: long (nullable = true)
|-- name: string (nullable = true)
*/
jsonDF.select("name").show()
/*
+-------+
| name|
+-------+
|Michael|
| Andy|
| Justin|
+-------+
*/
jsonDF.select(jsonDF("name"), jsonDF("age") + 10).show()
/*
+-------+----------+
| name|(age + 10)|
+-------+----------+
|Michael| null|
| Andy| 40|
| Justin| 29|
+-------+----------+
*/
jsonDF.filter(jsonDF("age") > 10).show()
/*
+---+------+
|age| name|
+---+------+
| 30| Andy|
| 19|Justin|
+---+------+
*/
// 创建DataFrame,并使用SQL语句查询
val peopleList = ListBuffer[people]()
peopleList.append(people(0, "Michael", 18))
peopleList.append(people(1, "Tom", 32))
peopleList.append(people(2, "Andy", 20))
peopleList.append(people(3, "Justin", 40))
peopleList.append(people(4, "Miss", 17))
peopleList.foreach(println(_))
// 创建DataFrame
var df = sqlContext.createDataFrame(peopleList)
df.printSchema()
/*
root
|-- id: integer (nullable = false)
|-- name: string (nullable = true)
|-- age: integer (nullable = false)
*/
// 修改指定的列名
df = sqlContext.createDataFrame(peopleList).withColumnRenamed("id", "ID")
df.printSchema()
df.show()
/*
root
|-- ID: integer (nullable = false)
|-- name: string (nullable = true)
|-- age: integer (nullable = false)
+---+-------+---+
| ID| name|age|
+---+-------+---+
| 0|Michael| 18|
| 1| Tom| 32|
| 2| Andy| 20|
| 3| Justin| 40|
| 4| Miss| 17|
+---+-------+---+
将DataFrame注册成一张临时表
在sqlContext使用registerTempTable注册临时表
在SparkSession使用createOrReplaceTempView注册临时表
*/
df.createOrReplaceTempView("peopleTable")
// 使用sql语句查询
val sqlDF = sqlContext.sql("select * from peopleTable order by name desc")
sqlDF.show()
/*
+---+-------+---+
| ID| name|age|
+---+-------+---+
| 1| Tom| 32|
| 4| Miss| 17|
| 0|Michael| 18|
| 3| Justin| 40|
| 2| Andy| 20|
+---+-------+---+
*/
sc.stop()
}
}
后续完善。。。