object StructTypeBySchema {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("StructTypeBySchema").setMaster("local[2]")
val sc = new SparkContext(sparkConf)
val sqlContext = new SQLContext(sc)
val rdd1 = sc.textFile("hdfs://master:9000/wt/sc.txt").map(x=>x.split(","))
val schame = StructType {
List(StructField("cid", IntegerType, true),
StructField("sid" , IntegerType , true),
StructField("grade", DoubleType, true))
}
val rowRdd = rdd1.map(p=>Row(p(0).toInt,p(1).toInt,p(2).toDouble))
val df = sqlContext.createDataFrame(rowRdd,schame)
println(df.show())
sc.stop()
}
}
2. spark通过反射指定Schema
object ReflectionBySchema {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("ReflectionBySchema").setMaster("local[2]")
val sc = new SparkContext(sparkConf)
val rdd1 = sc.textFile("hdfs://master:9000/wt/sc.txt").map(x=>x.split(","))
val rdd2 = rdd1.map(x=>Student(x(0).toInt,x(1).toInt,x(2).toInt))
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val stduentDf = rdd2.toDF
stduentDf.show
sc.stop()
}
case class Student(sno:Int,cno:Int,grade:Int)
}
Rdd与DataFrame互操作1. spark通过StructType直接指定Schemaobject StructTypeBySchema { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("StructTypeBySchema").setMas...