前言
资源全部来自于网络,我会尽量在每一题上都写上注释,没什么好说的,做就完事了。
第一题 计算月最大值
-- 建表CREATE TABLE mianshi1 (id VARCHAR (20),dates VARCHAR (20),v_num INT) charset=utf8;
-- 插入数据INSERT INTO mianshi1 VALUES ("A","2015-01",5),("A","2015-01",15),("B","2015-01",5),("A","2015-01",8),("B","2015-01",25),("A","2015-01",5),("A","2015-02",4),("A","2015-02",6),("B","2015-02",10),("B","2015-02",5),("A","2015-03",16),("A","2015-03",22),("B","2015-03",23),("B","2015-03",10),("B","2015-03",11);
-- 创建一个临时表,保存分组后的当月访问次数CREATE TABLE middle
AS
SELECT id, dates, SUM(v_num) AS s
FROM mianshi1
GROUP BY id, dates
ORDER BY id;
-- 不使用窗口函数-- 将中间表进行自链接,然后再根据b表进行分组,对分组后的a表数据进行聚合运算SELECT a.id AS '用户', b.dates AS '月份', b.s AS '当月访问次数'
, MAX(a.s) AS '最大访问次数'
, SUM(a.s) AS '总访问次数'
FROM middle a
INNER JOIN middle b
ON a.id = b.id
AND a.dates <= b.dates
GROUP BY b.id, b.dates;
-- 使用窗口函数SELECT id AS '用户', dates AS '月份', s AS '当月访问次数'
, MAX(s) OVER (PARTITION BY id ROWS BETWEEN unbounded PRECEDING AND CURRENT ROW) AS '最大访问次数'
, SUM(s) OVER (PARTITION BY id ROWS BETWEEN unbounded PRECEDING AND CURRENT ROW) AS '总访问次数'
FROM middle;
第二题 计算性别合计
计算各院系的男女计数以及合计。
-- 建表CREATE TABLE student1 (id VARCHAR (20),NAME VARCHAR (20),gender CHAR (1),birth VARCHAR (20),department VARCHAR (20),address VARCHAR (20)) charset=utf8;
-- 插入数据INSERT INTO student1 VALUES ("201901","张大佬","男","1985","计算机系","北京市海淀区"),("201902","郭大侠","男","1986","中文系","北京市昌平区"),("201903","张三","女","1990","中文系","湖南省永州市"),("201904","李四","男","1990","英语系","辽宁市阜新市"),("201905","王五","女","1991","英语系","福建省厦门市"),("201906","王六","男","1988","计算机系","湖南省衡阳市");
-- 子查询之后一定要给表随便起个名字,不然报错SELECT
department '院系',
male '男',
female '女',
male + female '合计'
FROM
(
SELECT
department,
sum(CASE gender WHEN '男' THEN 1 ELSE 0 END) male,
sum(CASE gender WHEN '女' THEN 1 ELSE 0 END) female
FROM
student1
GROUP BY
department
) a;
第三题 计算仓库合计(动态SQL)
使用动态SQL,仓库并不一定只有例子中的1234,可能有5678。 我的想法是这是个行列转换的题,用case...when语句,但是仓库的值不确定,所以考虑变量设置SQL语句。先查询表中的非重复仓库
子查询group by,通过select向字符串变量里循环添加case...when语句
外层再嵌套一个group by,最后拼接所有的变量sql字符串
使用预处理执行sql语句如果是静态SQL,就需要像上面一体不停的根据表中仓库变化,自己手动添加case,运用动态SQL就不需要自己手动添加 这个解法硬解可以,但肯定不是最好,希望有更好的解法。
-- 建表CREATE TABLE product (月份 INT,商品 VARCHAR (10),单价 DECIMAL (10,2),金额 DECIMAL (10,2),仓库 VARCHAR (5),数量 INT);
-- 插入数据INSERT INTO product VALUES (1,'商品1',100,1000,'仓库1',10),(1,'商品1',110,2200,'仓库2',20),(1,'商品2',100,1000,'仓库1',10),(1,'商品2',110,3300,'仓库3',30),(1,'商品3',110,3300,'仓库4',36);
-- 创建临时表用以保存现在有哪些仓库create temporary table if not exists warehouse as (select distinct 仓库 from product);
set @sql = 'select 月份,商品,avg(单价) as "单价",sum(金额) as "金额"';
set @sql_out = 'select 月份,商品,round(avg(单价),2) as "单价"';
-- 首先根据临时表设置sql语句select @sql:=(select @sql:=concat(@sql,',case 仓库 when "',仓库,'" then 数量 else 0 end as "',仓库,'"') as yuju from warehouse order by yuju desc limit 1);
select @sql_out:=CONCAT(@sql_out,',sum(',仓库,') as "',仓库,'"') from (warehouse);
select @sql:= concat(@sql_out,' from (',@sql,' from product group by 月份,商品,仓库)b group by 月份,商品');
-- 执行预处理PREPARE stmt from @sql;
EXECUTE stmt;
-- 释放预处理资源DEALLOCATE prepare stmt;
-- 删除临时表drop table warehouse;
第四题 计算年月累计值
需要求的是月合计,年累加,总累加。
-- 删除表DROP TABLE IF EXISTS temp2;
-- 建表CREATE TABLE IF NOT EXISTS temp2 ( DATE DATETIME, VALUE INT );
-- 插入语句INSERT INTO temp2 VALUES ('2018/11/23', 10), ('2018/11/25', 12),('2018/12/31', 3), ('2019/2/9', 53), ('2019/3/31', 23), ('2019/7/8', 11), ('2019/7/31', 10);
-- 使用窗口函数SELECT YEAR(DATE) AS 'YEAR', MONTH(DATE) AS 'MONTH'
, SUM(VALUE) AS 'VALUE'
, SUM(SUM(VALUE)) OVER (PARTITION BY YEAR(DATE) ROWS UNBOUNDED PRECEDING) AS 'YSUM'
, SUM(SUM(VALUE)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS 'SUM'
FROM temp2
GROUP BY YEAR, MONTH
-- 不使用窗口函数SET @year_sum = 0;
SET @cumsum = 0;
SET @year = 0;
SELECT b.YEAR, b.MONTH, b.YSUM, b.SUM
FROM (
SELECT a.*
, @year_sum := IF(@year = YEAR, @year_sum + VALUE, VALUE) AS YSUM
, @year := YEAR
, @cumsum := @cumsum + VALUE AS SUM
FROM (
SELECT YEAR(DATE) AS YEAR, MONTH(DATE) AS MONTH
, SUM(VALUE) AS VALUE
FROM temp2
GROUP BY YEAR(DATE), MONTH(DATE)
) a
) b
第五题 前n个字符串拼接
求userid对应前两个不同的场景,如果场景重复则选择第一次访问时间,场景号不足两个的输出一个即可。
我的思路是先根据userid和area分组,此时出来的数据就是每个user去过的每个area,再使用窗口函数赋予排名,将排名<=2取出,最后再根据userid分组进行concat拼接。
-- 删除表DROP TABLE IF EXISTS temp3;
-- 建表CREATE TABLE IF NOT EXISTS temp3 ( userid VARCHAR(20), AREA VARCHAR(20), DATE DATETIME );
-- 插入数据INSERT INTO temp3 VALUES (1, 1001, '2020/01/01'), (2, 1002, '2020/01/01'), (1, 1002, '2020/01/02'), (1, 1001, '2020/01/02'), (2, 1003, '2020/01/01'), (2, 1004, '2020/01/02'), (3, 1003, '2020/01/01'), (4, 1004, '2020/01/03'), (4, 1003, '2020/01/03'), (4, 1001, '2020/01/04'), (4, 1002, '2020/01/05'), (5, 1002, '2020/01/01'), (5, 1002, '2020/01/02'), (5, 1001, '2020/01/03'), (5, 1003, '2020/01/03');
SELECT CONCAT(b.userid, '-', GROUP_CONCAT(b.area SEPARATOR '-')) AS answer
FROM (
SELECT a.*, ROW_NUMBER() OVER (PARTITION BY userid ORDER BY DATE) AS 'rank'
FROM (
SELECT userid, AREA, DATE
FROM temp3
GROUP BY userid, AREA
) a
) b
WHERE b.rank <= 2
GROUP BY b.userid
第六题 连续签到问题
用户每天打卡一次为一条数据,可能存在重复行,同一天多条打卡记录算作一天打卡,假设今天为2020/08/06。
-- 删除表DROP TABLE IF EXISTS temp4;
-- 建表CREATE TABLE IF NOT EXISTS temp4 ( id INT, DATE DATE);
-- 插入数据INSERT INTO temp4 VALUES (1, '2020-07-31'), (1, '2020-08-01'), (1, '2020-08-03'), (1, '2020-08-04'), (1, '2020-08-05'), (1, '2020-08-06'), (2, '2020-08-04'), (2, '2020-08-05'), (2, '2020-08-06'), (3, '2020-08-03'), (3, '2020-08-04'), (3, '2020-08-06'), (4, '2020-07-29'), (4, '2020-07-30'), (4, '2020-07-31'), (4, '2020-08-06')
查询2020/08/06当天打卡的用户。
SELECT DISTINCT id,DATE FROM temp4 WHERE DATE(DATE)='2020/08/06';
现在有个七天活动,活动时间是从2020/07/31-2020/08/06的,我想知道2020/08/06这一天每个用户最近打卡日期及活动累计登录天数 。
SELECT id, MAX(DATE) AS '最近打卡日期', COUNT(DATE) AS '活动累计打卡天数'
FROM (
SELECT DISTINCT id, DATE(DATE) AS DATE
FROM temp4
WHERE DATE(DATE) BETWEEN '2020/07/31' AND '2020/08/06'
) a
GROUP BY id
我想查询截止至2020/08/06当天连续打卡3天的用户(包含2020/08/06)。
SELECT distinct id
FROM (
SELECT id, DATE, LEAD(DATE, 2) OVER (PARTITION BY id ORDER BY DATE DESC) AS 'lead_date'
FROM (
SELECT DISTINCT id, DATE(DATE) AS DATE
FROM temp4
) a
) b
WHERE DATE = '2020/08/06'
AND DATE = DATE_ADD(lead_date, INTERVAL 2 DAY);
用户历史最大连续签到天数及该次记录的最后一天签到日期。(这题自己随便想的,实在是没什么好的解法)
DROP TABLE IF EXISTS temp_;
CREATE TABLE temp_ AS
(SELECT id, COUNT(date_) AS c, DATE
FROM (
SELECT id, DATE, DATE_ADD(DATE, INTERVAL ROW_NUMBER() OVER (PARTITION BY id ORDER BY DATE DESC) DAY) AS date_
FROM (
SELECT DISTINCT id, DATE(DATE) AS DATE
FROM temp4
) a
) b
GROUP BY id, date_);
SELECT a.id, a.c AS 最大连续签到天数, DATE
FROM temp_
INNER JOIN (
SELECT id, MAX(c) AS c
FROM temp_
GROUP BY id
) a
ON temp_.id = a.id
AND temp_.c = a.c;
第七题 列转行
-- 删除存在的表DROP TABLE IF EXISTS temp1;
-- 建表CREATE TABLE IF NOT EXISTS temp1 ( sname VARCHAR(20), chinese INT, english INT, maths INT );
-- 插入数据INSERT INTO temp1 VALUES ('A', 90, 60, 40), ('B', 50, 90, 20), ('C', 40, 70, 90);
SELECT *
FROM (
SELECT sname, 'chinese' AS SUBJECT, chinese AS score
FROM temp1
UNION
SELECT sname, 'english' AS SUBJECT, english AS score
FROM temp1
UNION
SELECT sname, 'maths' AS SUBJECT, maths AS score
FROM temp1
) a
ORDER BY a.sname;
第八题 日期间隔问题拥有2辆及以上车的人每辆车的购车金额占个人总购车金额的比重。
DROP TABLE IF EXISTS car;
CREATE TABLE car ( userid int, carid varchar(10), price decimal(6, 2), date date );
INSERT INTO car VALUES (1, '沪A66666', 66.66, '2020-1-1'), (2, '沪A13256', 100.66, '2020-1-1'), (3, '沪A95466', 20.66, '2020-1-3'), (4, '沪A78945', 50.66, '2020-1-4'), (1, '沪A33666', 70.66, '2020-1-4'), (1, '沪A68886', 1006.66, '2020-1-5'), (2, '沪A88886', 88.66, '2020-1-5'), (4, '沪A45466', 123.66, '2020-6-6'), (1, '沪A66886', 2066.66, '2020-6-1');
SELECT car.userid, carid
, concat(round(price / s * 100, 2), '%') AS '占比'
FROM car
INNER JOIN (
SELECT userid, SUM(price) AS s, COUNT(userid) AS c
FROM car
GROUP BY userid
) a
ON car.userid = a.userid
WHERE c >= 2
ORDER BY car.userid
2. 求出个人两次购车间隔不超过90天的记录。
SELECT userid, carid, price, date
FROM (
SELECT *
, datediff(date, lag(date, 1) OVER (PARTITION BY userid ORDER BY date)) AS diff1
, datediff(date, lead(date, 1) OVER (PARTITION BY userid ORDER BY date)) AS diff2
FROM car
ORDER BY date
) a
WHERE abs(diff1) <= 90
OR abs(diff2) <= 90
参考文献