package SparkSQLDemo
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by tg on 10/27/16.
* 通过编程方式将RDD转换为DataFrame
*/
object RDD2DataFrameProgram {
def main(args: Array[String]): Unit = {
val conf=new SparkConf().setAppName("RDD2DataFrameProgram").setMaster("local")
val sc=new SparkContext(conf)
val sqlContext=new SQLContext(sc)
//第一步,构造出元素为ROW的普通RDD
val stusRDD=sc.textFile("file:///home/tg/datas/stus.txt")
.map(line=>{
val stu=line.split(",")
Row(stu(0).toInt,stu(1),stu(2).toInt)
})
//第二步,通过编程方式动态构造元数据
val structType=StructType(Array(
StructField("id",IntegerType,true),
StructField("name",StringType,true),
StructField("age",IntegerType,true)
))
//第三步,进行RDD到DataFrame的转换
val stuDF=sqlContext.createDataFrame(stusRDD,structType)
//继续正常使用
stuDF.registerTempTable("stus")
val ageResult=sqlContext.sql("select * from stus where age<=18")
ageResult.show()
//DataFrame也可以转换为RDD,然后调用RDD的算子进行计算
ageResult.rdd.collect().foreach(row=>println(row))
}
}