一、场景:操作文本数据
1、本地文件路径 E:\\temp\\demo.txt
2、内容
machao 29
shizhongyu 32
baiwanjian 28
zhaolinger 30
二、SparkSql代码实现
package cn.com.git.scala.spark.sparkSql
import org.apache.spark.sql.SQLContext
import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.log4j.Level
import org.apache.log4j.Logger
object PersonDao {
case class Person(name: String, age: Int)
def main(args: Array[String]) {
// 屏蔽控制台Log
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//配置
val conf = new SparkConf().setMaster("local").setAppName("Sparksql")
//初始化SparkContext
val sc = new SparkContext(conf)
//初始化SqlContext
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
//加载文本数据,转换成DataFrame
val data = sc.textFile("E:\\temp\\demo.txt").map(_.split(" ")).map(p => Person(p(0), p(1).toInt)).toDF()
//转换成临时表,表名person
data.registerTempTable("person")
//执行sql查询
val q = sqlContext.sql("select * from person")
//输出表数据
q.show()
println("----------------->")
//统计条数
val count = sqlContext.sql("select count(*) as count from person");
count.show()
}
}
三、执行结果
+----------+---+
| name|age|
+----------+---+
| machao| 29|
|shizhongyu| 32|
|baiwanjian| 28|
|zhaolinger| 30|
+----------+---+
----------------->
+-----+
|count|
+-----+
| 4|
+-----+