import org.apache.spark.sql.{DataFrame, SparkSession}
object Demo10stu {
def main(args: Array[String]): Unit = {
val spark: SparkSession =SparkSession
.builder()
.master("local")
.appName("brk")
.config("spark.sql.shuffle.partitions",2)
.getOrCreate()
import spark.implicits._
import org.apache.spark.sql.functions._
val stu: DataFrame =spark.read
.format("csv")
.option("sep",",")
.schema("name String,cou String, score String")
.load("spark/data/stu.txt")
/**
*
* 1、行列转换
*
* 表1
* 姓名,科目,分数
* name,item,score
* 张三,数学,33
* 张三,英语,77
* 李四,数学,66
* 李四,英语,78
*
* 1将表1转化成表2
* 表2
* * 姓名,数学,英语
* * name,math,english
* * 张三,33,77
* * 李四,66,78
*
*/
stu.createOrReplaceTempView("student")
//行转列
spark.sql(
"""
|
|
|select name,
|sum(case when cou='数学' then score else 0 end) as math,
|sum(case when cou='英语' then score else 0 end) as english
|from student group by name
|
""".stripMargin)
//.show()
//行转列
spark.sql(
"""
|
|select *
|from(select name,cou,score from student)
|pivot(sum(score)
|for cou in('数学' math,'英语' english))
|
|
""".stripMargin)
// .show()
//行转列
val result: DataFrame =stu
.groupBy($"name")
.pivot($"cou",List("数学","英语"))//透视
.agg(sum($"score"))
// result.show()
/**
* 表2
* 姓名,数学,英语
* name,math,english
* 张三,33,77
* 李四,66,78
*
* 1、将表1转化成表2
* 2、将表2转化成表1
*/
//列转行
result
.select($"name",explode(map(expr("`数学`"),$"数学",expr("`英语`"),$"英语")) as Array("cou","score"))
.show()
result.createOrReplaceTempView("student2")
//列转行 name 数学 英语
result.createOrReplaceTempView("student2")
spark.sql(
"""
|
|
|select name,cou,score from student2
|lateral view explode(map("数学",`数学`,"英语",`英语`)) t as cou,score
|
|
""".stripMargin)
.show(100)
result
.selectExpr("name","stack(2,'数学',`数学`,'英语',`英语`) as (`cou`,`score`)")
.show(100)
/**
* pivot函数:行转列函数:
* 语法:pivot(任一聚合函数 for 需专列的值所在列名 in (需转为列名的值));
*
*/
}
}
spark中的行列转换
最新推荐文章于 2023-12-21 12:29:06 发布