二、行列转换
表1
姓名,科目,分数
name,item,score
张三,数学,33
张三,英语,77
李四,数学,66
李四,英语,78
表2
姓名,数学,英语
name,math,english
张三,33,77
李四,66,78
1、将表1转化成表2
2、将表2转化成表1
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
object Code02 {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder()
.master("local")
.appName("change")
.config("spark.sql.shuffle.partitions", 1)
.getOrCreate()
import org.apache.spark.sql.functions._
import spark.implicits._
val info1DF: DataFrame = spark
.read
.format("csv")
.option("sep", ",")
.schema("name STRING,item STRING,score INT")
.load("data/1126/info1.txt")
val info2DF: DataFrame = spark
.read
.format("csv")
.option("sep", ",")
.schema("name STRING,math INT,english INT")
.load("data/1126/info2.txt")
info1DF.show()
info2DF.show()
// 1、将表1转化成表2
info1DF
.groupBy($"name")
.agg(
sum(when($"item"==="数学",$"score")) as "math",
sum(when($"item"==="英语",$"score")) as "english"
)
.show()
// 2、将表2转化成表1
val columns: Column = map(
expr("'数学'"),$"math",
expr("'英语'"),$"english"
)
info2DF
.select($"name",explode(columns) as Array("item","score"))
.show()
//SPARK SQL:
// 1、将表1转化成表2
info1Df.createTempView("t1")
spark.sql(
"""
|SELECT name
|,sum(case when item="数学" then score else 0 end) math
|,sum(case when item="英语" then score else 0 end) english
|FROM t1
|GROUP BY name
|""".stripMargin)
.show()
// 2、将表2转化成表1
info2Df.createTempView("t2")
spark.sql(
"""
|SELECT name,item,score
|FROM t2 lateral view explode(map("数学",math,"英语",english)) as item,score
|""".stripMargin)
.show()
}
}