spark:在spark-shell上运行一些sparkSQL简单语句--12

sparkSQL--简单语句

scala> val sqlContext = new org.apache.spark.sql.SQLContext(sc)
sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@7a0f926c

scala> import sqlContext._
import sqlContext._

scala> case class Person(name:String,age:Int)
defined class Person

scala> val people = sc.textFile("/datatnt/people.txt").map(_.split(",")).map(p => Person(p(0),p(1).trim.toInt))
15/03/18 21:07:51 INFO storage.MemoryStore: ensureFreeSpace(163705) called with curMem=0, maxMem=280248975
15/03/18 21:07:51 INFO storage.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 159.9 KB, free 267.1 MB)
15/03/18 21:07:55 INFO storage.MemoryStore: ensureFreeSpace(22923) called with curMem=163705, maxMem=280248975
15/03/18 21:07:55 INFO storage.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 22.4 KB, free 267.1 MB)
15/03/18 21:07:55 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:60363 (size: 22.4 KB, free: 267.2 MB)
15/03/18 21:07:55 INFO storage.BlockManagerMaster: Updated info of block broadcast_0_piece0
15/03/18 21:07:55 INFO spark.SparkContext: Created broadcast 0 from textFile at <console>:19
people: org.apache.spark.rdd.RDD[Person] = MappedRDD[3] at map at <console>:19

scala> people.registerAsTable("people")
warning: there were 1 deprecation warning(s); re-run with -deprecation for details

scala> people.toDebugString
15/03/18 21:15:07 INFO mapred.FileInputFormat: Total input paths to process : 1
res2: String = 
(1) MappedRDD[3] at map at <console>:19 []
 |  MappedRDD[2] at map at <console>:19 []
 |  /datatnt/people.txt MappedRDD[1] at textFile at <console>:19 []
 |  /datatnt/people.txt HadoopRDD[0] at textFile at <console>:19 []



scala> val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 and age <= 19")
teenagers: org.apache.spark.sql.SchemaRDD = 
SchemaRDD[8] at RDD at SchemaRDD.scala:108
== Query Plan ==
== Physical Plan ==
Project [name#2]
 Filter ((age#3 >= 13) && (age#3 <= 19))
  PhysicalRDD [name#2,age#3], MapPartitionsRDD[6] at mapPartitions at ExistingRDD.scala:36
                                                  ^
scala> teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
15/03/18 21:23:35 INFO spark.SparkContext: Starting job: collect at <console>:20
15/03/18 21:23:35 INFO scheduler.DAGScheduler: Got job 0 (collect at <console>:20) with 1 output partitions (allowLocal=false)
15/03/18 21:23:35 INFO scheduler.DAGScheduler: Final stage: Stage 0(collect at <console>:20)
15/03/18 21:23:35 INFO scheduler.DAGScheduler: Parents of final stage: List()
15/03/18 21:23:36 INFO scheduler.DAGScheduler: Missing parents: List()
15/03/18 21:23:36 INFO scheduler.DAGScheduler: Submitting Stage 0 (MappedRDD[9] at map at <console>:20), which has no missing parents
15/03/18 21:23:36 INFO storage.MemoryStore: ensureFreeSpace(6424) ca
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值