SQL-最值问题-topN

题目一:查询交易额最值

数据源

-- 建表
CREATE TABLE `deal` (
  `uid` varchar(255) DEFAULT NULL COMMENT '用户ID',
  `time` varchar(255) DEFAULT NULL COMMENT '用户交易时间',
  `amount` int(255) DEFAULT NULL COMMENT '用户交易额'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- 插入数据
INSERT INTO deal VALUES 
('1', '2019-02-08', '6214'),
('1', '2019-02-08', '8247'),
('1', '2019-02-09', '850'),
('1', '2019-02-10', '985'),
('1', '2019-02-09', '285'),
('1', '2019-02-11', '1285'),
('2', '2019-02-14', '1037'),
('2', '2019-02-15', '369'),
('2', '2019-02-15', '795'),
('2', '2019-02-19', '715'),
('2', '2019-02-21', '537'),
('2', '2019-02-24', '1037'),
('3', '2019-02-09', '967'),
('3', '2019-02-10', '850'),
('3', '2019-02-10', '769'),
('3', '2019-02-13', '940'),
('3', '2019-02-19', '843'),
('3', '2019-02-11', '850'),
('3', '2019-02-14', '843'),
('3', '2019-02-15', '900');
mysql> SELECT * FROM deal;
+------+------------+--------+
| uid  | time       | amount |
+------+------------+--------+
| 1    | 2019-02-08 |   6214 |
| 1    | 2019-02-08 |   8247 |
| 1    | 2019-02-09 |    850 |
| 1    | 2019-02-10 |    985 |
| 1    | 2019-02-09 |    285 |
| 1    | 2019-02-11 |   1285 |
| 2    | 2019-02-14 |   1037 |
| 2    | 2019-02-15 |    369 |
| 2    | 2019-02-15 |    795 |
| 2    | 2019-02-19 |    715 |
| 2    | 2019-02-21 |    537 |
| 2    | 2019-02-24 |   1037 |
| 3    | 2019-02-09 |    967 |
| 3    | 2019-02-10 |    850 |
| 3    | 2019-02-10 |    769 |
| 3    | 2019-02-13 |    940 |
| 3    | 2019-02-19 |    843 |
| 3    | 2019-02-11 |    850 |
| 3    | 2019-02-14 |    843 |
| 3    | 2019-02-15 |    900 |
+------+------------+--------+
20 rows in set (0.00 sec)

场景一:查询出每个uid交易额最大的时间和交易额

查询结果为(uid,time,amount), 注意某一用户最大交易额可能有多次

方法1:分组找最值 + IN

SELECT
	uid,
	time,
	amount AS max_amount
FROM 
	deal
WHERE
	(uid,amount) IN #只要是10块钱,只要是这个人,有几条拿几条
	(
		SELECT
			uid,
			max(amount) AS max_amount
		FROM 
			deal
		GROUP BY 
			uid
	)
;
+------+------------+------------+
| uid  | time       | max_amount |
+------+------------+------------+
| 1    | 2019-02-08 |       8247 |
| 2    | 2019-02-14 |       1037 |
| 2    | 2019-02-24 |       1037 |
| 3    | 2019-02-09 |        967 |
+------+------------+------------+
4 rows in set (0.01 sec)

方法2:遍历两遍本表,两本表每条记录互相作比较求topN——通用

SELECT 
	a.uid,
	a.time,
	a.amount AS max_amount
FROM 
	deal a
WHERE 
	1 > 
	(# topN,N是几这就是几,本题要top1。原因:比当前值大的个数是0个说明当前值a.amount是最大值
		SELECT  
			COUNT(DISTINCT(amount)) # 统计比当前值a.amount大的个数。加DISTINCT可以保证即使有特殊情况,统计也是准确的
		FROM 
			deal
		WHERE 
			amount > a.amount # 遍历deal,deal每一条分别和当前值a.amount比较,直到遍历完a表,判断完a表每一条记录。本质就是本表每一条记录分别与本表其他记录比较。
			AND uid = a.uid #和同一个用户比较,相当于分组操作,若没有则是和所有用户比较,统计的是整体最值
	)
;
+------+------------+------------+
| uid  | time       | max_amount |
+------+------------+------------+
| 1    | 2019-02-08 |       8247 |
| 2    | 2019-02-14 |       1037 |
| 2    | 2019-02-24 |       1037 |
| 3    | 2019-02-09 |        967 |
+------+------------+------------+
4 rows in set (0.01 sec)

方法3:利用hive窗口函数求topN——hive通用

select 
	uid,
	time,
	amount as max_amount
from
	(select 
		uid,
		time,
		amount,
		rank() over(partition by uid #按照uid分组
			order by amount desc #按照amount降序排序) as rank
	from deal
	) a
where rank = 1;

场景二:查询每个uid交易额最大的三个

方法1:查两遍本表,利用和当前值作比较求topN——通用

select
	uid,
	time,
	amount
from deal a
where 
	3 > (#本题要找top3,所以此处是3。原因:找top3,比当前值a.amount大的个数是0,1,2
		select
			count(distinct(amount)) #例如a.amount = 9,amount有3个10,a.amount是top2,不加distinct会漏掉这类特殊情况
		from deal
		where amount > a.amount and uid = a.uid
	)
order by uid,amount desc
;
-- 结果
+------+------------+--------+
| uid  | time       | amount |
+------+------------+--------+
| 1    | 2019-02-08 |   8247 |
| 1    | 2019-02-08 |   6214 |
| 1    | 2019-02-11 |   1285 |
| 2    | 2019-02-14 |   1037 |
| 2    | 2019-02-24 |   1037 |
| 2    | 2019-02-15 |    795 |
| 2    | 2019-02-19 |    715 |
| 3    | 2019-02-09 |    967 |
| 3    | 2019-02-13 |    940 |
| 3    | 2019-02-15 |    900 |
+------+------------+--------+
10 rows in set (0.00 sec)

归纳推广

topN比当前值大的个数
10
21
32
43
54
NN-1

结果:找topN,< N即可

方法2:利用hive窗口函数求topN——hive通用

rank() 排名相同,跳跃
dense_rank() 排名相同,不跳跃
row_number() 顺序排名,不跳跃

select 
	*
from
	(select
		uid,
		time,
		amount,
		dense_rank() over(partition by uid order by amount desc) as rank
	from deal
	) a
where 
	rank < 4;

场景三:统计每个用户每天最大交易额

select
	uid,
	time,
	max(amount) as day_max_amount
from 
	deal
group by 
	uid,
	time
;
+------+------------+----------------+
| uid  | time       | day_max_amount |
+------+------------+----------------+
| 1    | 2019-02-08 |           8247 |
| 1    | 2019-02-09 |            850 |
| 1    | 2019-02-10 |            985 |
| 1    | 2019-02-11 |           1285 |
| 2    | 2019-02-14 |           1037 |
| 2    | 2019-02-15 |            795 |
| 2    | 2019-02-19 |            715 |
| 2    | 2019-02-21 |            537 |
| 2    | 2019-02-24 |           1037 |
| 3    | 2019-02-09 |            967 |
| 3    | 2019-02-10 |            850 |
| 3    | 2019-02-11 |            850 |
| 3    | 2019-02-13 |            940 |
| 3    | 2019-02-14 |            843 |
| 3    | 2019-02-15 |            900 |
| 3    | 2019-02-19 |            843 |
+------+------------+----------------+
16 rows in set (0.00 sec)

场景四:统计每个用户每天的交易总额以及截止当天累计的交易额

方法1:分步 + 加字段

#第一步:统计每个用户每天的交易总额
#第二步:加一个字段统计截止当天累计的交易额
select
	uid,
	time,
	sum(amount) as day_sum_amount,
	(
		select 
			sum(amount)
		from 
			deal
		where 
			time <= a.time 
		and uid = a.uid
	) as sum_amount
from 
	deal a
group by 
	a.uid,
	a.time
;
+------+------------+----------------+------------+
| uid  | time       | day_sum_amount | sum_amount |
+------+------------+----------------+------------+
| 1    | 2019-02-08 |          14461 |      14461 |
| 1    | 2019-02-09 |           1135 |      15596 |
| 1    | 2019-02-10 |            985 |      16581 |
| 1    | 2019-02-11 |           1285 |      17866 |
| 2    | 2019-02-14 |           1037 |       1037 |
| 2    | 2019-02-15 |           1164 |       2201 |
| 2    | 2019-02-19 |            715 |       2916 |
| 2    | 2019-02-21 |            537 |       3453 |
| 2    | 2019-02-24 |           1037 |       4490 |
| 3    | 2019-02-09 |            967 |        967 |
| 3    | 2019-02-10 |           1619 |       2586 |
| 3    | 2019-02-11 |            850 |       3436 |
| 3    | 2019-02-13 |            940 |       4376 |
| 3    | 2019-02-14 |            843 |       5219 |
| 3    | 2019-02-15 |            900 |       6119 |
| 3    | 2019-02-19 |            843 |       6962 |
+------+------------+----------------+------------+
16 rows in set (0.00 sec)

方法2:利用hive窗口函数

select
	uid,
	time,
	day_amount,
	#开窗加一个字段实现截止当天累计的交易额
	sum(day_amount) over(partition by uid order by time) as sum_amount	#理解截止当天累计的交易额 = sum操作
from
	(
		select
			uid,
			time,
			sum(amount) as day_amount
		from
			deal
		group by 
			uid,
			time
	) a
;
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值