创建rdd方法1//use case class Person
case class Person(name:String,age:Int)
def rddToDFCase(sparkSession : SparkSession):DataFrame = {
//导入隐饰操作,否则RDD无法调用toDF方法
import sparkSession.implicits._
val peopleRDD = sparkSession.sparkContext
.textFile("file:/E:/scala_workspace/z_spark_study/people.txt",2)
.map( x => x.split(",")).map( x => Person(x(0),x(1).trim().toInt)).toDF()
peopleRDD
}
创建rdd方法2:
def rddToDF(sparkSession : SparkSession):DataFrame = {
//设置schema结构
val schema = StructType(
Seq(
StructField("name",StringType,true)
,StructField("age",IntegerType,true)
)
)
val rowRDD = sparkSession.sparkContext
.textFile("file:/E:/scala_workspace/z_spark_study/people.txt",2)
.map( x => x.split(",")).map( x => Row(x(0),x(1).trim().toInt))
sparkSession.createDataFrame(rowRDD,schema)
}