SPARK-SQL - group分组聚合api,agg()

准备orders.json文件

{"id":"1", "userId":"1", "userName":"Join", "totalPrice":80.0,"qty":3.0}
{"id":"2", "userId":"1", "userName":"Join", "totalPrice":50.0,"qty":3.0}
{"id":"3", "userId":"2", "userName":"Jeffy", "totalPrice":200.0,"qty":3.0}
{"id":"4", "userId":"99999", "userName":"zombie", "totalPrice":222.0,"qty":3.0}

用agg来将分组函数聚合起来一起查询

示例代码

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.util.HashMap;
import java.util.Map;

import static org.apache.spark.sql.functions.*;
import static org.apache.spark.sql.functions.sum;

public class test_28_2 {
    public static void main(String[] args) {
        SparkSession spark = SparkSession
                .builder()
                .config("spark.driver.host", "localhost")
                .appName("GroupApiTest")
                .master("local")
                .getOrCreate();

        spark.sparkContext().setLogLevel("ERROR");

        Dataset<Row> ordersDataSet = spark.read().json(Utils.BASE_PATH + "/join/orders.json");
        ordersDataSet.show();
        /*
		+---+---+----------+------+--------+
		| id|qty|totalPrice|userId|userName|
		+---+---+----------+------+--------+
		|  1|3.0|      80.0|     1|    Join|
		|  2|3.0|      50.0|     1|    Join|
		|  3|3.0|     200.0|     2|   Jeffy|
		|  4|3.0|     222.0| 99999|  zombie|
		+---+---+----------+------+--------+
         */

        //2: 用agg来将分组函数聚合起来一起查询
        ordersDataSet.groupBy("userId").agg(
                avg("totalPrice"),
                max("totalPrice"),
                min("totalPrice"),
                sum("totalPrice")).show();
        /*
		+------+---------------+---------------+---------------+---------------+
		|userId|avg(totalPrice)|max(totalPrice)|min(totalPrice)|sum(totalPrice)|
		+------+---------------+---------------+---------------+---------------+
		|     1|           65.0|           80.0|           50.0|          130.0|
		| 99999|          222.0|          222.0|          222.0|          222.0|
		|     2|          200.0|          200.0|          200.0|          200.0|
		+------+---------------+---------------+---------------+---------------+
         */

        Map<String, String> map = new HashMap<>();
        map.put("totalPrice", "avg");
        map.put("totalPrice", "max");
        map.put("totalPrice", "min");
        map.put("totalPrice", "sum");
        ordersDataSet.groupBy("userId").agg(map).show();
        /*
		+------+---------------+
		|userId|sum(totalPrice)|
		+------+---------------+
		|     1|          130.0|
		| 99999|          222.0|
		|     2|          200.0|
		+------+---------------+
         */

        //对整个orders进行聚合计算
        ordersDataSet.agg(
                avg("totalPrice"),
                max("totalPrice"),
                min("totalPrice"),
                sum("totalPrice")).show();
        /*
		+---------------+---------------+---------------+---------------+
		|avg(totalPrice)|max(totalPrice)|min(totalPrice)|sum(totalPrice)|
		+---------------+---------------+---------------+---------------+
		|          138.0|          222.0|           50.0|          552.0|
		+---------------+---------------+---------------+---------------+
         */

        ordersDataSet.agg(map).show();
        /*
		+---------------+
		|sum(totalPrice)|
		+---------------+
		|          552.0|
		+---------------+
         */
    }
}

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值