IDEA 中程序的打包和运行方式都和 SparkCore 类似,Maven 依赖中需要添加新的依赖项:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.1</version>
</dependency>
一、指定Schema格式
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.Row
object Demo1 {
def main(args: Array[String]): Unit = {
//使用Spark Session 创建表
val spark = SparkSession.builder().master("local").appName("UnderstandSparkSession").getOrCreate()
//从指定地址创建RDD
val personRDD = spark.sparkContext.textFile("D:\\tmp_files\\student.txt").map(_.split("\t"))
//通过StructType声明Schema
val schema = StructType(
List(
StructField("id", IntegerType),
StructField("name", StringType),
StructField("age", IntegerType)))
//把RDD映射到rowRDD
val rowRDD = personRDD.map(p=>Row(p(0).toInt,p(1),p(2).toInt))
val personDF = spark.createDataFrame(rowRDD, schema)
//注册表
personDF.createOrReplaceTempView("t_person")
//执行SQL
val df = spark.sql("select * from t_person order by age desc limit 4")
df.show()
spark.stop()
}
}
二、使用case class
import org.apache.spark.sql.SparkSession
//使用case class
object Demo2 {
def main(args: Array[String]): Unit = {
//创建SparkSession
val spark = SparkSession.builder().master("local").appName("CaseClassDemo").getOrCreate()
//从指定的文件中读取数据,生成对应的RDD
val lineRDD = spark.sparkContext.textFile("D:\\tmp_files\\student.txt").map(_.split("\t"))
//将RDD和case class 关联
val studentRDD = lineRDD.map( x => Student(x(0).toInt,x(1),x(2).toInt))
//生成 DataFrame,通过RDD 生成DF,导入隐式转换
import spark.sqlContext.implicits._
val studentDF = studentRDD.toDF
//注册表 视图
studentDF.createOrReplaceTempView("student")
//执行SQL
spark.sql("select * from student").show()
spark.stop()
}
}
//case class 一定放在外面
case class Student(stuID:Int,stuName:String,stuAge:Int)
三、把数据保存到数据库
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.Row
import java.util.Properties
object Demo3 {
def main(args: Array[String]): Unit = {
//使用Spark Session 创建表
val spark = SparkSession.builder().master("local").appName("UnderstandSparkSession").getOrCreate()
//从指定地址创建RDD
val personRDD = spark.sparkContext.textFile("D:\\tmp_files\\student.txt").map(_.split("\t"))
//通过StructType声明Schema
val schema = StructType(
List(
StructField("id", IntegerType),
StructField("name", StringType),
StructField("age", IntegerType)))
//把RDD映射到rowRDD
val rowRDD = personRDD.map(p => Row(p(0).toInt, p(1), p(2).toInt))
val personDF = spark.createDataFrame(rowRDD, schema)
//注册表
personDF.createOrReplaceTempView("person")
//执行SQL
val df = spark.sql("select * from person ")
//查看SqL内容
//df.show()
//将结果保存到mysql中
val props = new Properties()
props.setProperty("user", "root")
props.setProperty("password", "123456")
props.setProperty("driver", "com.mysql.jdbc.Driver")
df.write.mode("overwrite").jdbc("jdbc:mysql://localhost:3306/company?serverTimezone=UTC&characterEncoding=utf-8", "student", props)
spark.close()
}
}