1.功能:
实现Dataframe和RDD相互转换
2.Dataframe转换RDD
val jsonRdd=jsonDataFrame.rdd
3.RDD转换Dataframe
(1)通过类的反射机制
import sqlContext.implicits._
val df=rdd.toDF()
(2)明确给定字段名称和schema信息
val schema=StructType(Array(
StructField("rdd1Name",StringType),
StructField("rdd1Age",IntegerType)
))
//传入两个参数Rdd[Row],schema
//sqlContext.
val df1=sqlContext.createDataFrame(rdd1,schema)
df1.show
4.实例
4.1 SparkUtil
package SparkUtil
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by ibf on 2018/7/18.
*/
object SparkUtil {
def createSparkContext(isLocal:Boolean,appName:String): SparkContext ={
if(isLocal) {
val conf = new SparkConf()
.setAppName(appName)
.setMaster("local[2]")
val sc = SparkContext.getOrCreate(conf)
val ssc=SparkContext.getOrCreate(conf)
sc
}else{
val conf = new SparkConf()
.setAppName(appName)
val sc = SparkContext.getOrCreate(conf)
sc
}
}
}
4.2 RDDDataFrame
package _0728sql
import SparkUtil.SparkUtil
import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
/**
* Created by Administrator on 2018/7/29.
*/
object RDDDataFrame extends App{
//上下文
//构建上下文
val sc = SparkUtil.createSparkContext(true,"RDDDataFrame")
val sqlContext=new SQLContext(sc)
/*
第一:Dataframe转化RDD
可以直接通过.rdd转换
People数据源于:/opt/modules/spark-2.1.0-bin-2.7.3/examples/src/main/resources
*/
val jsonPath="file:///E:\\people.json"
val jsonDataFrame: DataFrame =sqlContext.read.json(jsonPath)
//Row
val jsonRdd=jsonDataFrame.rdd
//转换的时候,一般xie
val resultRdd=jsonRdd.map(
row=>{
(row.getAs[String](0),row.getAs[Int](1))
}
)
resultRdd.foreach(println)
/*第二:
rdd 转换为DataFrame
方法一:通过类的反射机制
在rdd中传入类,再通过toDF或者toDS转换成DataFrame或者DataSet
要求:rdd中传入类型必须是case class
引入sqlContext中的隐式函数
*/
import sqlContext.implicits._
val rdd=sc.parallelize(Array(
Person("xiaoming",20),
Person("lisi",40)
))
val df=rdd.toDF()
df.show()
/*
rdd 转换为DataFrame
方法二:在转换的时候,明确给定字段名称和schema信息
*/
val rdd1= sc.parallelize(Array(
("xiaoming",20),
("lisi",20),
("wangwu",20)
)).map {
case (name, age) => {
Row(name, age)
}
}
val schema=StructType(Array(
StructField("rdd1Name",StringType),
StructField("rdd1Age",IntegerType)
))
//传入两个参数Rdd[Row],schema
// sqlContext.
val df1=sqlContext.createDataFrame(rdd1,schema)
df1.show
//保存
val df2=df1.coalesce(1)
df2.write.format("parquet")
.mode(SaveMode.Append)
.save("file:///E:\\out")
}
case class Person(name:String,age:Int)