val conf = new SparkConf().setMaster("local").setAppName("ScalaDataFrameOps") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) val listRDD = sc.parallelize(List("zhangsan 13 168.5", "lisi 14 175.3", "wangwu 15 176.3")) import sqlContext.implicits._ val rowRDD = listRDD.map(line => { val splits = line.split(" ") val name = splits(0).trim val age = splits(1).trim.toInt val height = splits(2).trim.toDouble (name, age, height) }) val df = rowRDD.toDF("name", "age", "height") df.registerTempTable("person") //缓存一张临时表 sqlContext.cacheTable("person") val sqlDF = sqlContext.sql("select name, age, height from person where age > 14") sqlDF.show() val orderDF = sqlContext.sql("select name, age, height from person order by height desc") orderDF.show() //卸载一张临时表-->从内存中清除相关数据 sqlContext.uncacheTable("person") sc.stop()
SparkSQL之缓存表
最新推荐文章于 2024-04-26 15:25:18 发布