什么是DataFrame
DataFrame的特点
DataFrame的操作
创建DataFrame
val schamaString ="id,gender,height"
val conf =new SparkConf().setAppName("SQLTEST").setMaster("local")
val sc=new SparkContext(conf)
val peopleDataRDD=sc.textFile("/home/xuetao/Downloads/data.txt")
val sqlCtx=new SQLContext(sc)
val schemaArray=schamaString.split(",")
val schema=StructType(schemaArray.map(fieldName=>StructField(fieldName,StringType,true)))
val
rowRDD:RDD[Row]=peopleDataRDD.map(_.split("")).map(eachRow=>Row(eachRow(0),eachRow(1),eachRow(2)))
valpeopleDF=sqlCtx.createDataFrame(rowRDD,schema)
peopleDF.registerTempTable("people")
val hightMale170=sqlCtx.sql("select id, gender, height frompeople where height>170 and gender='M'")
println("Men whose height are more than 170:"+hightMale170.count())
sc.stop()
新的DataFrame