自连接
中位数问题
员工薪水中位数
将集合里的元素按照大小分为上半部分和下半部分两个子集,同时让这2 个子集共同拥有集合正中间的元素。这样共同部分的元素的平均值就是中位数
奇数情况:1个值 num(mid)>=n/2 个元素&& num(mid)<=n/2个元素
偶数情况:2个值 num(mid)>=n/2 个元素&& num(mid)<=n/2个元素
select min(t1.id) as id,
t1.company,
t1.salary
from Employee t1
join Employee t2
on t1.company=t2.company
group by t1.company,
t1.salary
having sum(case when t1.salary>=t2.salary then 1 else 0 end)>=count(*)/2
and sum(case when t1.salary<=t2.salary then 1 else 0 end)>=count(*)/2
给定频率查询中位数
解法同上:将数据划分为大于等于中位数的集合和小于等于中位数的集合,中位数就是二者的交集
select round(avg(num),2) as median
from (
select num,
sum(frequency) over() as total_num,
sum(frequency) over(order by num asc rows between unbounded preceding and current row) as less_num,
sum(frequency) over(order by num desc rows between unbounded preceding and current row) as more_num
from Numbers
group by num
)t
where less_num>=total_num/2
and more_num>=total_num/2
普通自连接问题
lc1097游戏玩法IV
自连接解法
-- 简简单单自连接
select t1.first_dt as install_dt,
count(t1.player_id) as installs,
round(coalesce(sum(if(t2.player_id is not null,1,0))/count(1),0),2) as Day1_retention
from (
select player_id,
min(event_date) as first_dt
from Activity
group by 1
)t1
left join (
select player_id,
event_date
from Activity
)t2
on datediff(t2.event_date,t1.first_dt)=1
and t1.player_id=t2.player_id
group by t1.first_dt
窗口函数解法
-- 1、求出每个player_id 的首次登录日期
-- 2、按首次登录日期进行去重计算
select first_date as install_dt,
count(distinct player_id) as installs,
round(coalesce(sum(if(datediff(event_date,first_date)=1,1,0))/count(distinct player_id),0),2) as Day1_retention
from (
select player_id,
event_date,
min(event_date) over(partition by player_id order by event_date asc) as first_date
from Activity
)t
group by first_date
lc1917好友推荐
-- 普通自连接
with tmp_friend as (
select user1_id,
user2_id
from Friendship
union all select user2_id as user1_id,
user1_id as user2_id
from Friendship
),
tmp_music_info as (
select user_id,
song_id,
day
from Listens
group by 1,
2,
3
),
tmp_over_3_song as (
select t1.user_id as user1_id,
t2.user_id as user2_id,
t1.day
from tmp_music_info t1
join tmp_music_info t2
on t1.user_id<>t2.user_id
and t1.song_id=t2.song_id
and t1.day=t2.day
group by 1,
2,3
having count(1)>=3
) select t1.user1_id as user_id,
t1.user2_id as recommended_id
from (
select user1_id,
user2_id
from tmp_over_3_song
union all select user2_id as user1_id,
user1_id as user2_id
from tmp_over_3_song
) t1
left join tmp_friend t2
on t1.user1_id=t2.user1_id
and t1.user2_id=t2.user2_id
where t2.user1_id is null
group by 1,2
lc2388将null值替换成前一个非空值
解法1 自定义分组
WITH tmp AS (
SELECT
id,
drink,
ROW_NUMBER() OVER() AS rk
FROM
CoffeeShop
)
SELECT
id,
MAX(drink) OVER(PARTITION BY grap ORDER BY rk ASC) AS drink
FROM (
SELECT
id,
drink,
rk,
SUM(flag) OVER(ORDER BY rk ASC) AS grap
FROM (
SELECT
*,
IF(drink IS NULL,0,1) AS flag
FROM
tmp
) t
) t
解法2自连接
with tmp as (
select id,
drink,
row_number() over() as rk
from CoffeeShop
) select id,
drink
from (
select t1.id,
t2.drink,
t1.rk,
row_number() over(partition by t1.id,t1.drink order by t2.rk desc) as rn
from tmp t1
join tmp t2
on t1.rk>=t2.rk
where t2.drink is not null
)t
where rn=1
group by 1,
2
order by rk asc
自连接写法2
WITH tmp AS (
SELECT
id,
drink,
row_number() OVER() AS rn
FROM
CoffeeShop
)
SELECT
id,
coalesce(drink,last_drink) as drink
FROM (
SELECT
t1.id,
t1.drink,
t1.rn,
t2.drink as last_drink,
row_number() over(partition by t1.id,t1.drink,t1.rn order by t2.rn desc ) as rk
FROM
tmp t1
LEFT JOIN (
SELECT
id,
drink,
rn
FROM
tmp t
WHERE
drink IS NOT NULL
) t2
ON t1.rn > t2.rn
) t
WHERE rk = 1
ORDER BY rn
自连接实现排序
select score,
`rank` from(
select t1.id,
t1.score,
count(distinct t2.score)+1 as `rank`
from Scores t1
left join Scores t2
on t1.id<>t2.id
and t1.score<t2.score
group by 1,
2
order by 3 asc
)t
order by 2 asc
Gap and island 连续登录问题
连续登录问题-601体育馆的人流量
1、等差数列
-- 1、筛选出大于100的天数
-- 2、利用等差数列的性质,两个等差数列差值是一定的
-- 3、等差数列的长度就是连续的天数
with tmp as (
select *,
id-row_number() over(order by id) as diff
from Stadium
where people>=100
)
select id,
visit_date,
people
from (
select id,
visit_date,
people,
count(diff) over(partition by diff) as cnt
from tmp
)t
where cnt>=3
order by visit_date asc
2、自连接
-- 1、求出大于等于100的行
-- 2、自连接 t2.id between t1.id-2 and t1.id
-- 3、筛选出等于count(t2.id)=3的行,t1.id t1.id-1 t1.id-2 人流量都大于100
-- 4、对于所有的t1.id,对应的t2.id就是所包含的连续的id
-- 5、对第4步的结果去重就是答案
with tmp as (
select *
from Stadium
where people>=100
) select id,
visit_date,
people
from (
select t2.id,
t2.visit_date,
t2.people,
count(1) over(partition by t1.id) as cnt
from tmp t1
join tmp t2
on t2.id between t1.id-2 and t1.id
)t
where cnt>=3
group by 1,
2,
3
order by visit_date asc
3、自定义分组
WITH tmp AS (
SELECT *
FROM Stadium
WHERE people >= 100
),
-- 1、计算当前id和前一个id是否是连续的,连续的标记offset=1
offset_calc AS (
SELECT id,
visit_date,
people,
id - LAG(id, 1, 0) OVER (ORDER BY id ASC) AS offset
FROM tmp
),
-- 2、如果offset=1表示该id和上个id是连续的,该行标记为0
-- 如果不连续,该行为连续区间的起始点,单独某行可以看做长度为1的连续数组
offset_flag_calc AS (
SELECT id,
visit_date,
people,
IF(offset = 1, 0, id) AS offset_flag
FROM offset_calc
),
-- 3、对于每个连续区间的起始点,可以把后面的数据填充为起始点的值
grap_calc AS (
SELECT id,
visit_date,
people,
SUM(offset_flag) OVER (ORDER BY id ASC) AS grap
FROM offset_flag_calc
),
-- 4、计算连续分区的数量
cnt_calc AS (
SELECT id,
visit_date,
people,
grap,
COUNT(1) OVER (PARTITION BY grap) AS cnt
FROM grap_calc
)
-- 5、筛选符合要求的值
SELECT id,
visit_date,
people
FROM cnt_calc
WHERE cnt >= 3
GROUP BY id, visit_date, people
ORDER BY visit_date ASC;
lc1225报告状态连续的日期
解法1、等差数列解法
-- 1、整合所有状态
-- 2、利用等差数列性质,构建原始序列
-- 3、每个状态构造一个序列
with all_state as (
select fail_date as date1,
'failed' as statement
from Failed
where fail_date between '2019-01-01' and '2019-12-31'
union all select success_date as date1,
'succeeded' as statement
from Succeeded
where success_date between '2019-01-01' and '2019-12-31'
) select period_state,
start_date,
end_date
from (
select id,
statement as period_state,
min(date1) as start_date,
max(date1)as end_date
from (
select date1,
statement,
datediff(date1,'1970-01-01') -row_number() over(partition by statement order by date1 asc) as id
from all_state
) t
group by 1,
2
)t
order by start_date asc
自定义分组,找到每个阶段第一个日期的解法
-- 创建一个包含所有状态的表
WITH all_status AS (
SELECT fail_date AS date, 'failed' AS status
FROM Failed
WHERE fail_date BETWEEN '2019-01-01' AND '2019-12-31'
UNION ALL
SELECT success_date AS date, 'succeeded' AS status
FROM Succeeded
WHERE success_date BETWEEN '2019-01-01' AND '2019-12-31'
),
-- 创建一个表,包含前一个状态的日期
previous_status AS (
SELECT date, status, LAG(date,1,'1970-01-01') OVER(PARTITION BY status ORDER BY date ASC) AS previous_date
FROM all_status
),
-- 创建一个表,包含标记,标记表示当前状态是否连续
status_with_flag AS (
SELECT date, status, MAX(offset_flag) OVER(PARTITION BY status ORDER BY date ASC) AS flag
FROM (
SELECT date, status, IF(DATEDIFF(date,previous_date)=1,0,date) AS offset_flag
FROM previous_status
) AS offset_status
)
-- 选择每个状态的开始和结束日期
SELECT period_status, start_date, end_date
FROM (
SELECT flag, status AS period_status, MIN(date) AS start_date, MAX(date) AS end_date
FROM status_with_flag
GROUP BY flag, period_status
) AS period_status
ORDER BY start_date ASC
lc1454活跃用户
等差数列算法
with tmp as(
select t1.id,t1.name,t2.login_date
from Accounts t1
join (
select id,login_date
from Logins
group by 1,2
)t2
on t1.id=t2.id
)
select id,name
from (
select id,name,grap
from (
select id,name,
datediff(login_date,'1970-01-01')- row_number() over(partition by id order by login_date asc) as grap
from tmp
)t
group by 1,2,3
having count(1)>=5
)t
group by 1,2
自定义分组-标记每一个的分组
with tmp as(
select t1.id,
t1.name,
t2.login_date
from Accounts t1
join (
select id,
login_date
from Logins
group by 1,
2
)t2
on t1.id=t2.id
) select id,
name from(
select id,
name,
flag
from (
select id,
name,
login_date,
max(flag) over(partition by id order by login_date asc) as flag
from (
select id,
name,
login_date,
if(datediff(login_date,lag_login_date)=1,0,login_date) as flag
from (
select id,
name,
login_date,
lag(login_date,1,'1970-01-01') over(partition by id order by login_date asc) as lag_login_date
from tmp
)t
)t
)t
GROUP BY 1,
2,
3
HAVING count(1)>=5
)t
group by 1,2
lc1811 寻找面试候选人
with tmp_gold_more_3_times as (
select t2.user_id,
t2.name,
t2.mail
from Contests t1
join Users t2
on t1.gold_medal=t2.user_id
group by 1,
2,
3
having count(1)>=3
),
tmp_continue_ward_ori as (
select gold_medal as user,
contest_id
from Contests
union all select silver_medal as user,
contest_id
from Contests
union all select bronze_medal as user,
contest_id
from Contests
),
tmp_continue_ward as (
select user
from (
select user,
id
from (
select user,
contest_id,
contest_id-row_number() over(partition by user order by contest_id asc) as id
from tmp_continue_ward_ori
)t
group by 1,
2
having count(1)>=3
)t
group by 1
) select name,
mail
from tmp_gold_more_3_times
union select name,
mail
from (
select t2.name,
t2.mail
from tmp_continue_ward t1
join Users t2
on t1.user=t2.user_id
)t
自定义分组
with tmp_gold_more_3_times as (
select t2.user_id,
t2.name,
t2.mail
from Contests t1
join Users t2
on t1.gold_medal=t2.user_id
group by 1,
2,
3
having count(1)>=3
),
tmp_continue_ward_ori as (
select user,
contest_id from(
select gold_medal as user,
contest_id
from Contests
union all select silver_medal as user,
contest_id
from Contests
union all select bronze_medal as user,
contest_id
from Contests
)t
group by 1,
2
),
tmp_continue_ward as (
select user
from (
select user,
id
from (
select user,
max(is_continue) over(partition by user order by contest_id asc) as id
from (
select user,
contest_id,
if(contest_id-lag_contest_id=1,0,contest_id)as is_continue from(
select user,
contest_id,
lag(contest_id,1,-1) over(partition by user order by contest_id asc) as lag_contest_id
from tmp_continue_ward_ori
)t
)t
)t
group by 1,
2
having count(1)>=3
)t
group by 1
) select name,
mail
from tmp_gold_more_3_times
union select name,
mail
from (
select t2.name,
t2.mail
from tmp_continue_ward t1
join Users t2
on t1.user=t2.user_id
)t
有的用户可能没有参加每一场竞赛,但是在参加的每一场竞赛中都表现得不错。你如何更改你的解法,以达到只考虑那些 用户参与了的 比赛?可假设另一张表给出了每场比赛的注册用户信息。
生成两个序列即可
用户参与比赛的序列
用户参与比赛并获奖的序列
lc1843可疑的银行账户
WITH tmp_income_info AS (
SELECT t1.account_id,
t2.mo
FROM Accounts t1
JOIN (
SELECT account_id,
FROM_UNIXTIME(UNIX_TIMESTAMP(day), '%Y%m') AS mo,
SUM(amount) AS amount
FROM Transactions
WHERE type='Creditor'
GROUP BY 1,
2
) t2
ON t1.account_id = t2.account_id
WHERE t2.amount > t1.max_income
) SELECT account_id FROM(
SELECT account_id,
grap
FROM (
SELECT account_id,
mo - ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY mo ASC) AS grap
FROM tmp_income_info
)t
GROUP BY 1,
2
HAVING COUNT(1) >= 2
)t
GROUP BY 1
lc2173最多连胜的次数
等差数列法,每个用户按照日期排序数列和按照日期输赢结果序列相减
select player_id,
max(case when result='Win' then streak else 0 end) as longest_streak
from (
select player_id,
result,
id1-id2 as grap,
count(1) as streak
from (
select player_id,
match_day,
result,
row_number() over(partition by player_id order by match_day asc) as id1,
row_number() over(partition by player_id,result order by match_day asc) as id2
from Matches
)t
group by 1,
2,
3
)t
group by 1
自定义分组,sum()使用0,1标记分组,max用日期标记分组
select player_id,
max(case when result='Win' then streak else 0 end) as longest_streak
from (
select player_id,
result,
grap,
count(1) as streak
from (
select player_id,
result,
match_day,
max(grap_begain) over(partition by player_id order by match_day asc) as grap
from (
select player_id,
match_day,
result,
if(result=lag_result,0,match_day) as grap_begain
from (
select player_id,
match_day,
result,
lag(result,1) over(partition by player_id order by match_day asc) as lag_result
from Matches
)t
)t
group by 1,
2,
3
)t
group by 1,
2,
3
)t
group by 1
2474连续递增问题
使用自定义分组:
with tmp AS
(SELECT year(order_date) AS y,
customer_id,
sum(price) AS amt
FROM Orders
GROUP BY 1,2 )
SELECT customer_id from
(SELECT y,
customer_id,
if(amt>lag_amt
AND y-lag_y<=1,1,0) AS flag from
(SELECT y,
lag(y,
1,
y) over(partition by customer_id
ORDER BY y asc) AS lag_y, customer_id, amt, lag(amt,1,0) over(partition by customer_id
ORDER BY y asc) AS lag_amt
FROM tmp )t )t
GROUP BY 1
HAVING count(1)=sum(if(flag=1,1,0))
需要递增的队列和序号进行比对。row_number()允许大于等于,rank是严格大于
# Write your MySQL query statement below
WITH tmp AS
(SELECT year(order_date) AS y,
customer_id,
sum(price) AS amt
FROM Orders
GROUP BY 1,
2 ) -- 自定义分组 -- 两个序列比较SELECT customer_id from
(SELECT customer_id,
amt,
y-rank() over(partition by customer_id
ORDER BY amt asc) AS grap
FROM tmp )t
GROUP BY 1
HAVING count(distinct grap)=1
窗口函数
基本使用
查询员工的累计薪水
SELECT
Id,
Month,
Salary
FROM (
SELECT
Id,
Month,
SUM(Salary) OVER (PARTITION BY Id ORDER BY Month ASC RANGE 2 PRECEDING) AS Salary,
RANK() OVER (PARTITION BY Id ORDER BY Month DESC) AS r
FROM
Employee
) t
WHERE
r > 1
ORDER BY
Id,
Month DESC;
lc626换座位
考察lead lag 函数以及位运算
-- 1、如果为奇数,取后一行的值,如果为偶数,取前一行的值
SELECT if(id&1,lead_id,lag_id) AS id,
student
FROM (
SELECT id,
lag(id,1,id) over(ORDER BY id ASC) AS lag_id,
lead(id,1,id) over(ORDER BY id ASC) AS lead_id,
student
FROM Seat
) t
ORDER BY if(id&1,lead_id,lag_id) ASC
lc1127用户购买平台
关键点在于使用窗口函数定义是否购买的
-- 1、生成全部的组合
WITH tmp_date_plat AS (
SELECT spend_date,
platform
FROM (
SELECT spend_date
FROM Spending s
GROUP BY 1
) t1
JOIN (
SELECT 'desktop' AS platform
UNION ALL SELECT 'mobile' AS platform
UNION ALL SELECT 'both' AS platform
) t2
ON 1 = 1
),
-- 2、使用窗口函数判定购买属性,然后根据属性进行分组计算
tmp_user_info AS (
SELECT spend_date,
CASE WHEN is_desktop_user=1 AND is_mobile_user= 1 THEN 'both'
WHEN is_desktop_user=1 AND is_mobile_user= 0 THEN 'desktop'
WHEN is_desktop_user=0 AND is_mobile_user= 1 THEN 'mobile'
END AS platform,
SUM(amount) AS total_amount,
COUNT(distinct user_id) AS total_users
FROM (
SELECT user_id,
spend_date,
amount,
platform,
MAX(CASE WHEN platform='mobile' THEN 1 ELSE 0 END) OVER(PARTITION BY user_id,spend_date) AS is_mobile_user,
MAX(CASE WHEN platform='desktop' THEN 1 ELSE 0 END) OVER(PARTITION BY user_id,spend_date) AS is_desktop_user
FROM Spending
)t
GROUP BY 1,
2
) SELECT t1.spend_date,
t1.platform,
COALESCE(t2.total_amount,0) AS total_amount,
COALESCE(t2.total_users,0) AS total_users
FROM tmp_date_plat t1
LEFT JOIN tmp_user_info t2
ON t1.spend_date=t2.spend_date
AND t1.platform=t2.platform
ORDER BY t1.platform,
t1.spend_date
使用自连接实现,关键在于case when 里加上having里的判断条件,hive支持么,也支持
case when 就是用来自定义分组的
-- 1、生成全部的组合
WITH tmp_date_plat AS (
SELECT spend_date,
platform
FROM (
SELECT spend_date
FROM Spending s
GROUP BY 1
) t1
JOIN (
SELECT 'desktop' AS platform
UNION ALL SELECT 'mobile' AS platform
UNION ALL SELECT 'both' AS platform
) t2
ON 1 = 1
), -- 2、使用窗口函数判定购买属性,然后根据属性进行分组计算
tmp_user_info AS (
select spend_date,
platform,
COUNT(distinct user_id) AS total_users,
SUM(total_amount) AS total_amount
from (
SELECT spend_date,
user_id,
case when count(distinct platform)=2 then 'both'
else platform
end as platform,
SUM(amount) AS total_amount
FROM Spending
GROUP BY 1,
2
) t
GROUP BY 1,2
) SELECT t1.spend_date,
t1.platform,
COALESCE(t2.total_amount,0) AS total_amount,
COALESCE(t2.total_users,0) AS total_users
FROM tmp_date_plat t1
LEFT JOIN tmp_user_info t2
ON t1.spend_date=t2.spend_date
AND t1.platform=t2.platform
ORDER BY t1.platform,
t1.spend_date
lc1321餐饮营业额的变化
lc1972同一天第一个和最后一个电话
WITH tmp_ori AS (
SELECT
caller_id,
recipient_id,
date_format(call_time,'%Y%m%d') AS dt,
unix_timestamp(call_time) AS unixtime
FROM Calls
UNION ALL
SELECT
recipient_id AS caller_id,
caller_id AS recipient_id,
date_format(call_time,'%Y%m%d') AS dt,
unix_timestamp(call_time) AS unixtime
FROM Calls
),
tmp_rk AS (
SELECT
caller_id,
recipient_id,
dt,
row_number() OVER(PARTITION BY caller_id, dt ORDER BY unixtime ASC) AS rk1,
row_number() OVER(PARTITION BY caller_id, dt ORDER BY unixtime DESC) AS rk2
FROM tmp_ori
),
tmp_filter AS (
SELECT
caller_id,
dt
FROM tmp_rk
WHERE rk1 = 1 OR rk2 = 1
GROUP BY caller_id, dt
HAVING COUNT(DISTINCT recipient_id) = 1
)
SELECT
caller_id AS user_id
FROM tmp_filter
GROUP BY caller_id
lc2004职工招聘问题
-- sum 窗口函数的使用
WITH junior_candidates AS (
SELECT employee_id,
experience,
salary,
SUM(salary) OVER (ORDER BY salary ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS sum_salary
FROM Candidates
WHERE experience = 'Junior'
),
senior_candidates AS (
SELECT employee_id,
experience,
salary,
SUM(salary) OVER (ORDER BY salary ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS sum_salary
FROM Candidates
WHERE experience = 'Senior'
),
senior_results AS (
SELECT 'Senior' AS experience,
COUNT(1) AS accepted_candidates,
COALESCE(MAX(sum_salary), 0) AS total_salary
FROM senior_candidates
WHERE sum_salary <= 70000
),
junior_results AS (
SELECT 'Junior' AS experience,
COUNT(1) AS accepted_candidates
FROM junior_candidates
WHERE sum_salary <= 70000 - (
SELECT total_salary
FROM senior_results
)
) SELECT experience,
accepted_candidates
FROM senior_results
UNION ALL SELECT experience,
accepted_candidates
FROM junior_results
自连接和窗口的实现
外连接
lc1097游戏玩法IV
其它内容
行列转换
lc618学生地理信息
-- 1、生成排序列、按照国家分组,每个国家按照name升序排序,rk的最大值就是最多人数国家的人数
-- 2、max实现行转列操作
select America,
Asia,
Europe
from (
select rk,
max(case when continent='America' then name else null end) as America,
max(case when continent='Asia' then name else null end) as Asia,
max(case when continent='Europe' then name else null end) as Europe from(
select name,
continent,
row_number() over(partition by continent order by name asc) as rk
from student
)t
group by rk
)t
order by rk asc
合并时间段问题
lc2494合并时间段重合的活动
1、将活动按照开始日期升序排列
2、每个活动计算上一次开始时间活动之前最大的结束日期,如果本次为第一次活动,或者本次的活动开始时间大于截至上次的最大结束时间,那么本次活动可以作为新的时间段
WITH tmp AS (
SELECT
hall_id,
start_day,
end_day,
SUM(flag) OVER (PARTITION BY hall_id ORDER BY start_day ASC) AS grap
FROM (
SELECT
hall_id,
start_day,
end_day,
CASE
WHEN max_until_last IS NULL THEN 1
WHEN start_day <= max_until_last THEN 0
ELSE 1
END AS flag
FROM (
SELECT
hall_id,
start_day,
end_day,
MAX(end_day) OVER (PARTITION BY hall_id ORDER BY start_day ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS max_until_last
FROM HallEvents
) t
) t
)
SELECT
hall_id,
start_day,
end_day
FROM (
SELECT
hall_id,
grap,
MIN(start_day) AS start_day,
MAX(end_day) AS end_day
FROM tmp
GROUP BY 1, 2
) t
解析时间区间
spark sql2.4可以使用 sequence函数解析时间区间
expode +sequcence函数
用法1
SELECT
date_format(stream_hour, 'yyyy-MM-dd HH:00:00') AS stream_hour
FROM (
SELECT
explode(sequence(to_timestamp(start_time, 'yyyy-MM-dd HH:mm:ss'), to_timestamp(end_time, 'yyyy-MM-dd HH:mm:ss'), interval 1 hour)) AS stream_hour
FROM
live_streams
)
使用lateral view
SELECT id, hour
FROM tmp_data
LATERAL VIEW explode(sequence(to_timestamp(start_time, 'yyyy-MM-dd HH:mm:ss'), to_timestamp(end_time, 'yyyy-MM-dd HH:mm:ss'), interval 1 hour)) as hour
生成序列
lc1336每次访问的次数
如何生成序列
with tmp as (
select t1.visit_date,
t1.user_id,
count(t2.transaction_date) as ord_cnt
from Visits t1
left join Transactions t2
on t1.visit_date=t2.transaction_date
and t1.user_id=t2.user_id
group by t1.visit_date,
t1.user_id
),
tmp_ord as (
select ord_cnt as transactions_count,
count(1) as visits_count
from tmp
group by 1
),
-- 生成序列
tmp_ori as (
select row_number() over()-1 as transactions_count
from (
select user_id
from Visits
union all select user_id
from Transactions
)t
) select t1.transactions_count,
coalesce(t2.visits_count,0) as visits_count
from tmp_ori t1
left join tmp_ord t2
on t1.transactions_count=t2.transactions_count
where t1.transactions_count<=(
select max(transactions_count)
from tmp_ord
)
order by 1
购物蓝分析
[1398]购买了产品 A 和产品 B 却没有购买产品 C 的顾客
简简单单
# Write your MySQL query statement below
select t1.customer_id,
t1.customer_name
from Customers t1
left join Orders t2
on t1.customer_id=t2.customer_id
group by 1,
2
having sum(case when t2.product_name='A' then 1 else 0 end)>0
and sum(case when t2.product_name='B' then 1 else 0 end)>0
and sum(case when t2.product_name='c' then 1 else 0 end)=0
其它
1384. 按年度列出销售总额
主要是使用dt_seq确定时间范围
with tmp as (
select t1.product_id,
t1.product_name,
t2.period_start,
t2.period_end,
datediff(t2.period_start,'2018-01-01')+1 as period_start_seq,
datediff(t2.period_end,'2018-01-01')+1 as period_end_seq,
t2.average_daily_sales
from Product t1
join Sales t2
on t1.product_id=t2.product_id
) select product_id,
product_name,
report_year,
total_amount from(
select product_id,
product_name,
'2018' as report_year,
average_daily_sales*( if(period_end_seq>365,365,period_end_seq)-period_start_seq+1 ) as total_amount
from tmp
where (period_start_seq>=1 and period_start_seq<=365)
group by 1,
2,
3
union all select product_id,
product_name,
'2019' as report_year,
average_daily_sales*( if(period_end_seq>730,730,period_end_seq)- if(period_start_seq<366,366,period_start_seq) +1 ) as total_amount
from tmp
where period_start_seq<=730
and period_end_seq>=366
group by 1,
2,
3
union all select product_id,
product_name,
'2020' as report_year,
average_daily_sales*( if(period_end_seq>1096,1096,period_end_seq)- if(period_start_seq<731,731,period_start_seq) +1 ) as total_amount
from tmp
where period_start_seq<=1096
and period_end_seq>=731
group by 1,
2,
3
)t
order by product_id,
report_year asc
lc1517查找拥有有效邮箱的用户
# Write your MySQL query statement below
select user_id,name,mail
from users
where mail regexp '^[a-zA-Z][a-zA-Z0-9\\_\\./-]*@leetcode\\.com$'
lc2308按照性别切换表格
分组排序
# Write your MySQL query statement below
# 取每个性别的第一名,每个性别按照一定顺序排序
select
user_id,
gender
from(
select user_id,
gender,
case when gender='female' then 0
when gender='other' then 1
else 2 end as rk1,
row_number() over(partition by gender order by user_id asc ) as rk2
from Genders
)t
order by rk2,rk1