文章目录
SQLContext的使用
Spark1.x中Spark SQL的入口点: SQLContext
val sc: SparkContext // An existing SparkContext.
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
// this is used to implicitly convert an RDD to a DataFrame.
import sqlContext.implicits._
建立一个scala maven项目
SQLContext测试:
添加相关pom依赖
<properties>
<scala.version>2.11.8</scala.version>
<spark.version>2.1.0</spark.version>
</properties>
<dependencies>
<!--scala-->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!--SparkSQL-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
测试数据:
{“name”: “zhangsan”, “age”:30}
{"name ": “Michael”}
{“name” : “Andy”, “age”:30}
{“name” : “Justin”, “age” :19}
package com.imooc.spark
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.SparkConf
/**
* SQLContext的使用:
* 注意:IDEA是在本地,而测试数据是在服务器上 ,能不能在本地进行开发测试的?
*
*/
object SQLContextApp {
def main(args: Array[String]) {
val path = args(0)
//1)创建相应的Context
val sparkConf = new SparkConf()
//在测试集群或者生产集群中,AppName和Master我们是通过脚本进行指定
//sparkConf.setAppName("SQLContextApp").setMaster("local[2]")
val sc = new SparkContext(sparkConf)
val sqlContext = new SQLContext(sc)
//2)相关的处理: json
val people = sqlContext.read