package com.ws.sparksql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
* spark sql版本 1.1x
* spark sql 第3个demo
*/
object SparkSqlDemo3 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SparkSqlDemo3").setMaster("local[2]")
val sc = new SparkContext(conf)
//创建一个sparksql实例
val sQLContext = new SQLContext(sc)
val dataRdd: RDD[String] = sc.textFile("hdfs://hadoop-01:9000/student")
val rowRdd: RDD[Row] = dataRdd.map(line => {
val strArr: Array[String] = line.split(",")
val id = strArr(0).toLong
val name = strArr(1)
val age = strArr(2).toInt
val score = strArr(3).toInt
Row(id, name, age, score)
})
val schema = StructType(List(
//参数1:字段名, 参数2:字段类型,参数3:是否允许null
StructField("id",LongType,false),
StructField("name",StringType,false),
StructField("age",IntegerType,false),
StructField("score",IntegerType,false)
))
val sdf: DataFrame = sQLContext.createDataFrame(rowRdd,schema)
//使用dataFrame方式查询数据
val column: DataFrame = sdf.select("name","age","score")
//排序
import sQLContext.implicits._
val result: Dataset[Row] = column.sort($"score" desc , $"age" asc)
//触发Action操作,展示结果
result.show()
sc.stop()
}
}
结果 :
+----+---+-----+
|name|age|score|
+----+---+-----+
| 张三| 18| 150|
| 李四| 19| 150|
| 王五| 20| 98|
| 赵六| 17| 88|
+----+---+-----+