行转列 Pivot函数应用
原始数据如下
+----+-----+
|code|value|
+----+-----+
| a| 1|
| b| 1|
| c| 1|
| d| 1|
| e| 1|
| f| 1|
+----+-----+
pivot函数效果
+----+----+----+----+----+----+----+----+
|code| a| b| c| d| e| f|temp|
+----+----+----+----+----+----+----+----+
| f|null|null|null|null|null| 1| 1|
| e|null|null|null|null| 1|null| 1|
| d|null|null|null| 1|null|null| 1|
| c|null|null| 1|null|null|null| 1|
| b|null| 1|null|null|null|null| 1|
| a| 1|null|null|null|null|null| 1|
+----+----+----+----+----+----+----+----+
+---+---+---+---+---+---+
| a| b| c| d| e| f|
+---+---+---+---+---+---+
| 1| 1| 1| 1| 1| 1|
+---+---+---+---+---+---+
实现代码
object rowTurnColumn {
def main(args: Array[String]): Unit = {
val spark = getLocalSparkSession()
import spark.implicits._
val tuples = Seq(
("a", 1),
("b", 1),
("c", 1),
("d", 1),
("e", 1),
("f", 1)
)
val frame = tuples.toDF("code", "value")
frame.show()
val frame1 = frame.groupBy("code")
.pivot("code")
.sum("value")
.withColumn("temp", lit("1"))
frame1.show()
frame1.createOrReplaceTempView("frame1")
spark.sql(
s"""
|select
|${tuples.map(x=>{s"""sum(t1.${x._1}) as ${x._1}"""}).mkString(",")}
|from frame1 t1
|group by
|t1.temp
|""".stripMargin).show()
}
}
行转列 :stack函数应用
+---+---+---+---+---+---+
| a| b| c| d| e| f|
+---+---+---+---+---+---+
| 1| 1| 1| 1| 1| 1|
+---+---+---+---+---+---+
+----+-----+
|code|value|
+----+-----+
| a| 1|
| b| 1|
| c| 1|
| d| 1|
| e| 1|
| f| 1|
+----+-----+
code如下
// SELECT stack(n,expr1, ..., exprk) AS (alias1, alias2, ...) FROM myTable; 语法格式
// 说明:stack(n, expr1, ..., exprk) - 会将expr1, ..., exprk 分割为n行.
spark.sql(
s"""
|select
|stack(${tuples.length},${tuples.map(x=>{s"'${x._1}',`${x._1}`"}).mkString(",")}) as (`code`,`value`)
|from frame2 t1
|""".stripMargin).show()