/**
* DataFrame和RDD的互操作
*
* 1)反射:case class 前提:事先需要知道你的字段,字段类型
* 2)编程:row 如果第一种情况不能满足你的要求(事先不知道列)
* 3)选型:优先选择第一种,第二种编写麻烦,还要将schema和rdd关联起来
*/
object DataFrameRDDApp {
def main(args: Array[String]) {
val spark = SparkSession.builder().appName("DataFrameRDDApp").master("local[2]").getOrCreate()
//inferReflection(spark)
program(spark)
spark.stop()
}
def program(spark: SparkSession): Unit = {
// RDD ==> DataFrame
val rdd = spark.sparkContext.textFile("file:///Users/rocky/data/infos.txt")
val infoRDD = rdd.map(_.split(",")).map(line => Row(line(0).toInt, line(1), line(2).toInt))
val structType = StructType(Array(StructField("id", IntegerType, true),
StructField("name", StringType, true),
StructField("age", IntegerType, true)))
val infoDF = spark.createDataFrame(infoRDD,structType)
infoDF.printSchema()
infoDF.show()
//通过df的api进行操作
infoDF.filter(infoDF.col("age") > 30).show
//通过sql的方式进行操作
infoDF.createOrReplaceTempView("infos")
spark.sql("select * from infos where age > 30").show()
}
def inferReflection(spark: SparkSession) {
// RDD ==> DataFrame
val rdd = spark.sparkContext.textFile("file:///Users/rocky/data/infos.txt").var
//注意:需要导入隐式转换
import spark.implicits._
val infoDF = rdd.map(_.split(",")).map(line => Info(line(0).toInt, line(1), line(2).toInt)).toDF()
infoDF.show()
infoDF.filter(infoDF.col("age") > 30).show
infoDF.createOrReplaceTempView("infos")
spark.sql("select * from infos where age > 30").show()
}
case class Info(id: Int, name: String, age: Int)
}
DataFrame和RDD的互操作
最新推荐文章于 2024-07-25 20:02:36 发布