目录
题目一:查询交易额最值
数据源
-- 建表
CREATE TABLE `deal` (
`uid` varchar(255) DEFAULT NULL COMMENT '用户ID',
`time` varchar(255) DEFAULT NULL COMMENT '用户交易时间',
`amount` int(255) DEFAULT NULL COMMENT '用户交易额'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- 插入数据
INSERT INTO deal VALUES
('1', '2019-02-08', '6214'),
('1', '2019-02-08', '8247'),
('1', '2019-02-09', '850'),
('1', '2019-02-10', '985'),
('1', '2019-02-09', '285'),
('1', '2019-02-11', '1285'),
('2', '2019-02-14', '1037'),
('2', '2019-02-15', '369'),
('2', '2019-02-15', '795'),
('2', '2019-02-19', '715'),
('2', '2019-02-21', '537'),
('2', '2019-02-24', '1037'),
('3', '2019-02-09', '967'),
('3', '2019-02-10', '850'),
('3', '2019-02-10', '769'),
('3', '2019-02-13', '940'),
('3', '2019-02-19', '843'),
('3', '2019-02-11', '850'),
('3', '2019-02-14', '843'),
('3', '2019-02-15', '900');
mysql> SELECT * FROM deal;
+------+------------+--------+
| uid | time | amount |
+------+------------+--------+
| 1 | 2019-02-08 | 6214 |
| 1 | 2019-02-08 | 8247 |
| 1 | 2019-02-09 | 850 |
| 1 | 2019-02-10 | 985 |
| 1 | 2019-02-09 | 285 |
| 1 | 2019-02-11 | 1285 |
| 2 | 2019-02-14 | 1037 |
| 2 | 2019-02-15 | 369 |
| 2 | 2019-02-15 | 795 |
| 2 | 2019-02-19 | 715 |
| 2 | 2019-02-21 | 537 |
| 2 | 2019-02-24 | 1037 |
| 3 | 2019-02-09 | 967 |
| 3 | 2019-02-10 | 850 |
| 3 | 2019-02-10 | 769 |
| 3 | 2019-02-13 | 940 |
| 3 | 2019-02-19 | 843 |
| 3 | 2019-02-11 | 850 |
| 3 | 2019-02-14 | 843 |
| 3 | 2019-02-15 | 900 |
+------+------------+--------+
20 rows in set (0.00 sec)
场景一:查询出每个uid交易额最大的时间和交易额
查询结果为(uid,time,amount), 注意某一用户最大交易额可能有多次
方法1:分组找最值 + IN
SELECT
uid,
time,
amount AS max_amount
FROM
deal
WHERE
(uid,amount) IN #只要是10块钱,只要是这个人,有几条拿几条
(
SELECT
uid,
max(amount) AS max_amount
FROM
deal
GROUP BY
uid
)
;
+------+------------+------------+
| uid | time | max_amount |
+------+------------+------------+
| 1 | 2019-02-08 | 8247 |
| 2 | 2019-02-14 | 1037 |
| 2 | 2019-02-24 | 1037 |
| 3 | 2019-02-09 | 967 |
+------+------------+------------+
4 rows in set (0.01 sec)
方法2:遍历两遍本表,两本表每条记录互相作比较求topN——通用
SELECT
a.uid,
a.time,
a.amount AS max_amount
FROM
deal a
WHERE
1 >
(# topN,N是几这就是几,本题要top1。原因:比当前值大的个数是0个说明当前值a.amount是最大值
SELECT
COUNT(DISTINCT(amount)) # 统计比当前值a.amount大的个数。加DISTINCT可以保证即使有特殊情况,统计也是准确的
FROM
deal
WHERE
amount > a.amount # 遍历deal,deal每一条分别和当前值a.amount比较,直到遍历完a表,判断完a表每一条记录。本质就是本表每一条记录分别与本表其他记录比较。
AND uid = a.uid #和同一个用户比较,相当于分组操作,若没有则是和所有用户比较,统计的是整体最值
)
;
+------+------------+------------+
| uid | time | max_amount |
+------+------------+------------+
| 1 | 2019-02-08 | 8247 |
| 2 | 2019-02-14 | 1037 |
| 2 | 2019-02-24 | 1037 |
| 3 | 2019-02-09 | 967 |
+------+------------+------------+
4 rows in set (0.01 sec)
方法3:利用hive窗口函数求topN——hive通用
select
uid,
time,
amount as max_amount
from
(select
uid,
time,
amount,
rank() over(partition by uid #按照uid分组
order by amount desc #按照amount降序排序) as rank
from deal
) a
where rank = 1;
场景二:查询每个uid交易额最大的三个
方法1:查两遍本表,利用和当前值作比较求topN——通用
select
uid,
time,
amount
from deal a
where
3 > (#本题要找top3,所以此处是3。原因:找top3,比当前值a.amount大的个数是0,1,2
select
count(distinct(amount)) #例如a.amount = 9,amount有3个10,a.amount是top2,不加distinct会漏掉这类特殊情况
from deal
where amount > a.amount and uid = a.uid
)
order by uid,amount desc
;
-- 结果
+------+------------+--------+
| uid | time | amount |
+------+------------+--------+
| 1 | 2019-02-08 | 8247 |
| 1 | 2019-02-08 | 6214 |
| 1 | 2019-02-11 | 1285 |
| 2 | 2019-02-14 | 1037 |
| 2 | 2019-02-24 | 1037 |
| 2 | 2019-02-15 | 795 |
| 2 | 2019-02-19 | 715 |
| 3 | 2019-02-09 | 967 |
| 3 | 2019-02-13 | 940 |
| 3 | 2019-02-15 | 900 |
+------+------------+--------+
10 rows in set (0.00 sec)
归纳推广
topN | 比当前值大的个数 |
---|---|
1 | 0 |
2 | 1 |
3 | 2 |
4 | 3 |
5 | 4 |
– | – |
N | N-1 |
结果:找topN,< N即可
方法2:利用hive窗口函数求topN——hive通用
rank() 排名相同,跳跃
dense_rank() 排名相同,不跳跃
row_number() 顺序排名,不跳跃
select
*
from
(select
uid,
time,
amount,
dense_rank() over(partition by uid order by amount desc) as rank
from deal
) a
where
rank < 4;
场景三:统计每个用户每天最大交易额
select
uid,
time,
max(amount) as day_max_amount
from
deal
group by
uid,
time
;
+------+------------+----------------+
| uid | time | day_max_amount |
+------+------------+----------------+
| 1 | 2019-02-08 | 8247 |
| 1 | 2019-02-09 | 850 |
| 1 | 2019-02-10 | 985 |
| 1 | 2019-02-11 | 1285 |
| 2 | 2019-02-14 | 1037 |
| 2 | 2019-02-15 | 795 |
| 2 | 2019-02-19 | 715 |
| 2 | 2019-02-21 | 537 |
| 2 | 2019-02-24 | 1037 |
| 3 | 2019-02-09 | 967 |
| 3 | 2019-02-10 | 850 |
| 3 | 2019-02-11 | 850 |
| 3 | 2019-02-13 | 940 |
| 3 | 2019-02-14 | 843 |
| 3 | 2019-02-15 | 900 |
| 3 | 2019-02-19 | 843 |
+------+------------+----------------+
16 rows in set (0.00 sec)
场景四:统计每个用户每天的交易总额以及截止当天累计的交易额
方法1:分步 + 加字段
#第一步:统计每个用户每天的交易总额
#第二步:加一个字段统计截止当天累计的交易额
select
uid,
time,
sum(amount) as day_sum_amount,
(
select
sum(amount)
from
deal
where
time <= a.time
and uid = a.uid
) as sum_amount
from
deal a
group by
a.uid,
a.time
;
+------+------------+----------------+------------+
| uid | time | day_sum_amount | sum_amount |
+------+------------+----------------+------------+
| 1 | 2019-02-08 | 14461 | 14461 |
| 1 | 2019-02-09 | 1135 | 15596 |
| 1 | 2019-02-10 | 985 | 16581 |
| 1 | 2019-02-11 | 1285 | 17866 |
| 2 | 2019-02-14 | 1037 | 1037 |
| 2 | 2019-02-15 | 1164 | 2201 |
| 2 | 2019-02-19 | 715 | 2916 |
| 2 | 2019-02-21 | 537 | 3453 |
| 2 | 2019-02-24 | 1037 | 4490 |
| 3 | 2019-02-09 | 967 | 967 |
| 3 | 2019-02-10 | 1619 | 2586 |
| 3 | 2019-02-11 | 850 | 3436 |
| 3 | 2019-02-13 | 940 | 4376 |
| 3 | 2019-02-14 | 843 | 5219 |
| 3 | 2019-02-15 | 900 | 6119 |
| 3 | 2019-02-19 | 843 | 6962 |
+------+------------+----------------+------------+
16 rows in set (0.00 sec)
方法2:利用hive窗口函数
select
uid,
time,
day_amount,
#开窗加一个字段实现截止当天累计的交易额
sum(day_amount) over(partition by uid order by time) as sum_amount #理解截止当天累计的交易额 = sum操作
from
(
select
uid,
time,
sum(amount) as day_amount
from
deal
group by
uid,
time
) a
;