package com.ws.sparksql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
* spark sql版本 1.1x
* spark sql 第一个demo
*/
object SparkSqlDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SparkSqlDemo").setMaster("local[2]")
val sc = new SparkContext(conf)
//创建一个sparksql实例
val sQLContext = new SQLContext(sc)
val dataRdd: RDD[String] = sc.textFile("hdfs://hadoop-01:9000/student")
val studentRdd: RDD[Student] = dataRdd.map(line => {
val strArr: Array[String] = line.split(",")
val id = strArr(0).toLong
val name = strArr(1)
val age = strArr(2).toInt
val score = strArr(3).toInt
Student(id, name, age, score)
})
import sQLContext.implicits._
//把普通RDD转换成特殊的RDD,变成dataFrame
//个人觉得dataFrame和mysql表结构一样,其中的一条数据就是一个pojo,pojo映射成mysql的表结构
val sdf: DataFrame = studentRdd.toDF
//将dataFrame注册一个表
sdf.registerTempTable("t_student")
//写sprak sql,本质也是Transformation
val result: DataFrame = sQLContext.sql("select * from t_student order by score desc , age asc")
//触发Action操作,展示结果
result.show()
sc.stop()
}
}
case class Student(id: Long, name: String, age: Int, score: Int)
结果 :
+---+----+---+-----+
| id|name|age|score|
+---+----+---+-----+
| 1| 张三| 18| 150|
| 2| 李四| 19| 150|
| 3| 王五| 20| 98|
| 4| 赵六| 17| 88|
+---+----+---+-----+