1.请你写出一个sql语句查询每个日期新用户的次日留存率,结果保留小数点后面3位数(3位之后的四舍五入),并且查询结果按照日期升序排序,上面的例子查询结果如下:
结果图示:
建表语句
drop table if exists login;
CREATE TABLE `login` ( `id` int(4) NOT NULL, `user_id` int(4) NOT NULL, `client_id` int(4) NOT NULL, `date` date NOT NULL, PRIMARY KEY (`id`));
插入数据
INSERT INTO login VALUES (1,2,1,'2020-10-12'), (2,3,2,'2020-10-12'), (3,1,2,'2020-10-12'), (4,2,2,'2020-10-13'), (5,1,2,'2020-10-13'), (6,3,1,'2020-10-14'), (7,4,1,'2020-10-14'), (8,4,1,'2020-10-15');
思路:
1:不一定每天都有新登录用户,例如想看10.1 10.2,但只要10.1有新登录用户,则记录只有10.2,这个比较容易直接查到原表的日期有哪些
然后我们右连接回去(也可以用当前日期不存在的union)
2:这些没有新登录用户的日期,会导致分母为0,即null值的计算,这个用ifnull。
综上整理一下,没有任何难度的代码就出来了,不涉及任何花里胡哨的代码和子查询:
解题:
select t0.date,
ifnull(round(count(distinct t2.user_id)/(count(t1.user_id)),3),0)
from
(
select date
from login
group by date
) t0
left join
(
select user_id,min(date) as date
from login
group by user_id
)t1
on t0.date=t1.date
left join login as t2
on t1.user_id=t2.user_id and datediff(t2.date,t1.date)=1
group by t0.date
2.
建表插入数据
create database "xiangji";
use "xiangji";
create table userinfo(uid varchar(10),app_name varchar(20),duration int(10),times int(10),dayno varchar(30));
Insert into table userinfo values("1001","xiangji",1000,3,"2018-05-01"),("1002","xiangji",1100,2,"2018-05-01"),
("1003","xiangji",1500,4,"2018-05-01"),("1004","xiangji",1100,2,"2018-05-01"),
("1001","xiangji",2000,5,"2018-05-02"),("1002","xiangji",1100,2,"2018-05-02"),("1004","xiangji",1400,3,"2018-05-02"),
("1001","xiangji",1100,2,"2018-05-03"),("1005","xiangji",1100,2,"2018-05-03"),("1004","xiangji",1100,2,"2018-05-03"),("1006","xiangji",1100,2,"2018-05-03"),
("1003","xiangji",1100,2,"2018-05-04"),("1004","xiangji",1100,2,"2018-05-04"),("1005","xiangji",1100,2,"2018-05-04"),("1006","xiangji",1100,2,"2018-05-04"),
("1001","xiangji",1100,2,"2018-05-05"),("1002","xiangji",1100,2,"2018-05-05"),("1003","xiangji",1100,2,"2018-05-05"),("1004","xiangji",1100,2,"2018-05-05"),("1005","xiangji",1100,2,"2018-05-05"),("1006","xiangji",1100,2,"2018-05-05"),("1007","xiangji",1100,2,"2018-05-05"),("1008","xiangji",1100,2,"2018-05-05"),
("1008","xiangji",1100,2,"2018-05-06"),("1007","xiangji",1100,2,"2018-05-06");
次日留存人数
小知识点:hivesql中不支持%Y-%m-%d,可以yyyy-MM-dd
select a.day1,
count(distinct case when day2-day1=1 then a.uid end) #次留
from
(select uid,date_format(dayno,'yyyyMMdd')as day1
from userinfo where app_name='xiangji') a #用date_format把dayno的文本格式改为可计算的形式
left join
(select uid,date_format(dayno,'yyyyMMdd')as day2
from userinfo where app_name='xiangji') b on a.uid=b.uid group by a.day1;
计算每天的活跃数,以及次日、三日、七日留存用户和留存率
select day1,
count(distinct a.uid) --活跃,
count(distinct case when day2-day1=1 then a.uid end) --次留,
count(distinct case when day2-day1=3 then a.uid end) --三留,
count(distinct case when day2-day1=7 then a.uid end) --七留,
concat(count(distinct case when day2-day1=1 then a.uid end)/count(distinct a.uid)*100,'%') --次日留存率,
concat(count(distinct case when day2-day1=3 then a.uid end)/count(distinct a.uid)*100,'%') --三日留存率,
concat(count(distinct case when day2-day1=7 then a.uid end)/count(distinct a.uid)*100,'%') --七日留存率
from (select uid, date_format(dayno, '%Y%m%d') day1 from aui where app_name = '相机') a
left join (select uid, date_format(dayno, '%Y%m%d') day2
from aui
where app_name = '相机') bon a.uid=b.uidgroup by day1;
3:如下数据为蚂蚁森林中用户领取的减少碳排放量
找出连续 3 天及以上减少碳排放量在 100 以上的用户
id dt lowcarbon
1001 2021-12-12 123
1002 2021-12-12 45
1001 2021-12-13 43
1001 2021-12-13 450
1001 2021-12-13 23
1002 2021-12-14 45
1001 2021-12-14 230
1002 2021-12-15 45
1001 2021-12-15 23
create table exam.carbon_emission
(
id int,
dt string,
lowcarbon int
)
PARTITIONED BY (ds string)
row format delimited fields terminated by ' '
stored as textfile;
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
--加载数据
load data local inpath '/home/wcw/sj/kaoshi/1.txt' into table carbon_emission partition (ds = "20220304");
select *
from (
select id, dt, lowcarbon, count(flagdt) over (partition by id,flagdt) ct
from (
select id, dt, lowcarbon, t1.rn, date_sub(cast(dt as date), rn) flagdt
from (
select id, dt, lowcarbon, row_number() over (partition by id order by dt) rn
from exam.carbon_emission
where lowcarbon >= 100
) t1
) t2) t3