1.交易表结构为user_id,order_id,pay_time,order_amount
(i) 写sql查询过去一个月付款用户量(提示 用户量需去重)最高的3天分别是哪几天
SELECT pay_time, COUNT(DISTINCT user_id) AS number
WHERE pay_time > DATA_ADD(GETDATE(), INTERVAL -30 DAY)
GROUP BY pay_time
ORDER BY number DESC
LIMIT 3
(ii) 写sql查询昨天每个用户最后付款的订单ID及金额
SELECT t1.order_id, t1.order_amount
FROM
(
SELECT user_id, MAX(pay_time), order_id, order_amount
FROM Table t
GROUP BY user_id
) t1
2.PV表a(表结构为user_id,goods_id),点击表b(user_id,goods_id),数据量各为50万?条,在防止数据倾斜的情况下,写一句sql找出两个表共同的user_id和相应的goods_id
3.表结构为user_id, reg_time, age
(i) 写一句sql按user_id尾数随机抽样2000个用户
SELECT * FROM Table ORDER BY RAND() LIMIT 2000
(ii) 写一句sql取出按各年龄段(每10岁一个分段,如[0,10))分别抽样1%的用户
4.用户登录日志表为user_id, log_id, session_id,plat
(i) 用sql查询近30天每天平均登录用户数量
SELECT SUM(t1.number)/30
FROM t1
(SELECT 日期每日, COUNT(DISTINCT user_id) AS number
FROM Table t
WHERE 时间 > 现在时间 - 30
GROUP BY 日期(每日))t1
(ii) 用sql查询出近30天连续访问7天以上的用户数量
SELECT COUNT(DISTINCT t1.user_id)
FROM
(SELECT t.user_id, t.时间,(时间 - row_number() OVER (PARTITION BY user_id ORDER BY 时间)) as diff
(SELECT *
FROM Table
WHERE GETDATE() < DATE_ADD(时间,INTERVAL 30 DAY
ORDER BY user_id, 时间) t
) t1
GROUP BY t1.user, t1.diff
HAVING COUNT(*) >=7
5.表user_id,visit_date,page_name,plat (见notebook)
(i) 统计近7天每天到访的新用户数
(ii) 统计每个访问渠道plat7天前的新用户的3日留存率和7日留存率
6. 找出每个部门薪水前两名的人(salary, employee, department)
方法一:同一张表用两次,选出比自己工资多的人数只有1人或者没有的那个人
SELECT *
FROM Table t
WHERE (SELECT COUNT(*) FROM Table t1 WHERE t.department = t1.department AND t1.salary < t.salary) <=1
方法二:row_number
SELECT *
FROM (SELECT row_number OVER (PARTITION BY department ODER BY salary DESC) AS row_number, *)
WHERE row_number <= 2
7. 数据库sql语句使用,给你一个S学生表(S_ID,S_NAME)、C课程表(C_ID,C_NAME)、SC学生选课成绩表(S_ID,C_ID,SCORE),问怎么查询到某门课成绩最高的两个学生的姓名?需要写出对应的sql语句。
重要知识点:
ROW_NUMBER() -- 是没有重复值的排序(即使两条记录相同,序号也不重复的),不会有同名次。
DENSE_RANK() -- 是连续的排序,两个第二名仍然跟着第三名。
RANK() -- 是跳跃排序,两个第二名下来就是第四名。
SELECT *
FROM
(SELECT t3.C_NAME, t2.S_NAME, row_number() OVER (partition by t3.C_NAME order by t1.score desc) ranking
FROM Student_Class_Score t1
Left join Student t2 ON t1.S_ID = t2.S_ID
Left join Class t3 ON t1.C_ID = t2.C_ID
) t
where t.ranking <= 3
8. 求出每个国家发生交易的总额(买家和卖家在两个国家的分别计入)
select country, sum(amount) as sum
from
(
(select t.*, u1.country as country
from transaction as t
left join user as u1
on t.seller_id = u1.user_id)
union
(select t.*, u2.country as country
from transaction as t
left join user as u2
on t.buyer_id = u2.user_id)
) new
group by country
9. Users交易
知识点1:
窗口函数!!!一定要注意!!!我就凉在这里了!!!
窗口函数使用举例
知识点2:(HiveSQL)
datediff(string enddate, string startdate)-- datediff('2016-12-30', '2016-12-29')
date_add(string startdate, intdays) -- date_add('2016-12-29',10)
date_sub (string startdate,int days) -- date_sub('2016-12-29',10)
(1)求出每个userid连续交易的最长天数(适用于一天有多笔交易的情况)
select userid,max(cons_num) as max_cons_num
from
(
select userid, cons_no,count(cons_no) as cons_num
from
(
select *,
datediff(day,'2019-01-01',dateid) as day,
row_number() over(partition by userid order by userid,dateid) as rownumber,
datediff(day,'2019-01-01',dateid) - row_number() over(partition by userid order by userid,dateid) as cons_no
from
(select * from trans group by userid, dateid) a
) b
group by userid, cons_no
) c
group by userid
(2)求出每个userid连续不交易的最长天数:
select userid, max(uncons_day) as max_uncons_day
from
(
select *,
min(dateid) over (partition by userid order by userid,dateid rows between 1 preceding and 1 preceding) as preceding,
datediff(day,min(dateid) over (partition by userid order by userid,dateid rows between 1 preceding and 1 preceding) ,dateid) as uncons_day
from
(select * from trans group by userid,dateid) a
) b
group by userid