sparkSql记录
SparkSession的三种创建方式
入口 :SQLContext与SparkSession
2.0版本以前,SparkSQL所有的功能入口是SQLContext
//SQLContext要依赖SparkContext
2.0版本以后,SparkSQL所有的功能入口是SparkSession
# 在老的版本中,SparkSQL提供两种SQL查询起始点,一个叫SQLContext,用于Spark自己提供的SQL查询,一个叫HiveContext,用于连接Hive的查询,SparkSession是Spark最新的SQL查询起始点,实质上是SQLContext和HiveContext的组合,所以在SQLContext和HiveContext上可用的API在SparkSession上同样是可以使用的。SparkSession内部封装了sparkContext,计算实际上是由sparkContext完成的。
/**
* 创建SparkSession方式1
* builder用于创建一个SparkSession。
* appName设置App的名字
* master设置运行模式(集群模式不用设置)
* getOrCreate 进行创建
*/
val sparks1 = SparkSession.builder()
.appName("SparkSession1")
.master("local")
.getOrCreate()
/**
* 创建SparkSession方式2
* 先通过SparkConf创建配置对象
* SetAppName设置应用的名字
* SetMaster设置运行模式(集群模式不用设置)
* 在通过SparkSession创建对象
* 通过config传入conf配置对象来创建SparkSession
* getOrCreate 进行创建
*/
val conf = new SparkConf()
.setAppName("SparkSession2")
.setMaster("local")
val sparks2 = SparkSession
.builder()
.config(conf)
.getOrCreate()
/**
* 创建SparkSession方式3
* builder用于创建一个SparkSession。
* appName设置App的名字
* master设置运行模式(集群模式不用设置)
* enableHiveSupport 开启hive操作
* getOrCreate 进行创建
*/
val sparks3 =
SparkSession
.builder()
.appName("SparkSession3")
.master("local")
.enableHiveSupport()
.getOrCreate()
//关闭
sparks1.stop()
sparks2.stop()
sparks3.stop()
spark包装json数据
准备数据
张三 25 男 chinese 50
张三 25 男 math 60
张三 25 男 english 70
张三 25 男 chinese 80
李四 20 男 chinese 50
李四 20 男 math 50
李四 20 男 english 50
王芳 19 女 chinese 70
王芳 19 女 math 70
王芳 19 女 english 70
张大三 25 男 chinese 60
张大三 25 男 math 60
张大三 25 男 english 70
李大四 20 男 chinese 50
李大四 20 男 math 60
李大四 20 男 english 50
王小芳 19 女 chinese 70
王小芳 19 女 math 80
王三芳 19 女 his 70
王四芳 19 女 pyh 70
王五芳 19 女 chinese 70
王三芳 19 女 english 70
王四芳 19 女 math 70
王五芳 19 女 math 70
王三芳 19 女 math 70
实体类
import org.codehaus.jackson.annotate.JsonProperty;
import java.io.Serializable;
public class Stu implements Serializable {
@JsonProperty("name")
String name;
@JsonProperty("age")
String age;
@JsonProperty("gender")
String gender;
@JsonProperty("classname")
String classname;
@JsonProperty("score")
String score;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAge() {
return age;
}
public void setAge(String age) {
this.age = age;
}
public String getGender() {
return gender;
}
public void setGender(String gender) {
this.gender = gender;
}
public String getClassname() {
return classname;
}
public void setClassname(String classname) {
this.classname = classname;
}
public String getScore() {
return score;
}
public void setScore(String score) {
this.score = score;
}
}
测试代码
package testjson
import jsonClass.Stu
import org.apache.htrace.fasterxml.jackson.databind.ObjectMapper
import org.apache.spark.sql.{DataFrame, SparkSession}
object test1 {
def main(args: Array[String]): Unit = {
// TODO: 设置输出日志等级
// Logger.getLogger("org").setLevel(Level.WARN)
//
// TODO: 获取sparkSession链接
val spark = SparkSession.builder()
.appName("mobile")
.master("local")
// .config("spark.some.config.option", "some-value")
.getOrCreate()
//TODO: 读取本地文件为df
val df: DataFrame = spark
.read
.format("csv")
.option("delimiter", " ")
.load("stu.txt")
// TODO: 修改df的表字段
val df1: DataFrame = df.
withColumnRenamed("_c0", "name")
.withColumnRenamed("_c1", "age")
.withColumnRenamed("_c2", "gender")
.withColumnRenamed("_c3", "classname")
.withColumnRenamed("_c4", "score")
// TODO: 查表
// df1.show()
// TODO: 处理数据
val dt = df1.rdd.map(x => {
val name = x.getAs[String]("name")
val age = x.getAs[String]("age")
val gender = x.getAs[String]("gender")
val classname = x.getAs[String]("classname")
val score = x.getAs[String]("score")
// TODO: 返回一个五元组
(name, age, gender, classname,score)
})
.map(f => {
val stu = new Stu
stu.setName(f._1)
stu.setAge(f._2)
stu.setGender(f._3)
stu.setClassname(f._4)
stu.setScore(f._5)
val jsonStr = new ObjectMapper().writeValueAsString(stu)
println(jsonStr)
val score = f._5
val rowkeystr = new StringBuilder()
.append(score+"-"+score)
// TODO: 返回key和 jsonStr
(rowkeystr.toString() + "|" + jsonStr)
})
// TODO: 遍历打印返回值
.foreach(println)
// 写入文件
/* val conf = new Configuration()
val path = new Path("path")
val fileSystem = FileSystem.get(conf)
if (fileSystem.exists(path)) {
fileSystem.delete(path,true)
}
dt.saveAsTextFile("path")*/
spark.stop()
}
}
json数据
50-50|{"name":"张三","age":"25","gender":"男","classname":"chinese","score":"50"}
60-60|{"name":"张三","age":"25","gender":"男","classname":"math","score":"60"}
70-70|{"name":"张三","age":"25","gender":"男","classname":"english","score":"70"}
80-80|{"name":"张三","age":"25","gender":"男","classname":"chinese","score":"80"}
50-50|{"name":"李四","age":"20","gender":"男","classname":"chinese","score":"50"}
50-50|{"name":"李四","age":"20","gender":"男","classname":"math","score":"50"}
50-50|{"name":"李四","age":"20","gender":"男","classname":"english","score":"50"}
70-70|{"name":"王芳","age":"19","gender":"女","classname":"chinese","score":"70"}
70-70|{"name":"王芳","age":"19","gender":"女","classname":"math","score":"70"}
70-70|{"name":"王芳","age":"19","gender":"女","classname":"english","score":"70"}
60-60|{"name":"张大三","age":"25","gender":"男","classname":"chinese","score":"60"}
60-60|{"name":"张大三","age":"25","gender":"男","classname":"math","score":"60"}
70-70|{"name":"张大三","age":"25","gender":"男","classname":"english","score":"70"}
50-50|{"name":"李大四","age":"20","gender":"男","classname":"chinese","score":"50"}
60-60|{"name":"李大四","age":"20","gender":"男","classname":"math","score":"60"}
50-50|{"name":"李大四","age":"20","gender":"男","classname":"english","score":"50"}
70-70|{"name":"王小芳","age":"19","gender":"女","classname":"chinese","score":"70"}
80-80|{"name":"王小芳","age":"19","gender":"女","classname":"math","score":"80"}
70-70|{"name":"王三芳","age":"19","gender":"女","classname":"his","score":"70"}
70-70|{"name":"王四芳","age":"19","gender":"女","classname":"pyh","score":"70"}
70-70|{"name":"王五芳","age":"19","gender":"女","classname":"chinese","score":"70"}
70-70|{"name":"王三芳","age":"19","gender":"女","classname":"english","score":"70"}
70-70|{"name":"王四芳","age":"19","gender":"女","classname":"math","score":"70"}
70-70|{"name":"王五芳","age":"19","gender":"女","classname":"math","score":"70"}
70-70|{"name":"王三芳","age":"19","gender":"女","classname":"math","score":"70"}