pivot实现行转列
准备json文件
{"id":"1", "orderId":"1", "name":"apple", "amount":4, "price":20.0, "userId":"1"}
{"id":"2", "orderId":"2", "name":"book", "amount":5, "price":10.0, "userId":"1"}
{"id":"3", "orderId":"3", "name":"cake", "amount":1, "price":200.0, "userId":"2"}
示例代码
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.util.Arrays;
public class test28_4 {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.config("spark.driver.host", "localhost")
.appName("GroupApiTest")
.master("local")
.getOrCreate();
spark.sparkContext().setLogLevel("ERROR");
Dataset<Row> orderItems = spark.read().json(Utils.BASE_PATH + "/join/order_items.json");
orderItems.show();
/*
+------+---+-----+-------+-----+------+
|amount| id| name|orderId|price|userId|
+------+---+-----+-------+-----+------+
| 4| 1|apple| 1| 20.0| 1|
| 5| 2| book| 2| 10.0| 1|
| 1| 3| cake| 3|200.0| 2|
+------+---+-----+-------+-----+------+
*/
orderItems.groupBy("userId").pivot("name").sum("price").show();
/*
+------+-----+----+-----+
|userId|apple|book| cake|
+------+-----+----+-----+
| 1| 20.0|10.0| null|
| 2| null|null|200.0|
+------+-----+----+-----+
*/
orderItems.groupBy("userId").pivot("name", Arrays.asList("apple", "cake")).sum("price").show();
/*
+------+-----+-----+
|userId|apple| cake|
+------+-----+-----+
| 1| 20.0| null|
| 2| null|200.0|
+------+-----+-----+
*/
}
}