/* 建表liucun(id number, login_dt date)
数据
1,2021/9/1
2,2021/9/1
3,2021/9/1
4,2021/9/1
1,2021/9/2
2,2021/9/2
4,2021/9/2
5,2021/9/2
1,2021/9/3
2,2021/9/3
3,2021/9/3
4,2021/9/3
5,2021/9/3
6,2021/9/3
2,2021/9/4
3,2021/9/4
5,2021/9/4
6,2021/9/4
题目一:要求根据数据统计每天的新增用户
1,2021/9/1
2,2021/9/1
3,2021/9/1
4,2021/9/1
5,2021/9/2
6,2021/9/3
*/
分析:新增用户表示在当前日期前没有登录过的id,即就是当前日期的新增id
/*建表以及插入数据*/
create table liucun(id number,
login_dt date);
insert into liucun values(1,to_date('2021/9/1','yyyy/mm/dd'));
insert into liucun values(2,to_date('2021/9/1','yyyy/mm/dd'));
insert into liucun values(3,to_date('2021/9/1','yyyy/mm/dd'));
insert into liucun values(4,to_date('2021/9/1','yyyy/mm/dd'));
insert into liucun values(1,to_date('2021/9/2','yyyy/mm/dd'));
insert into liucun values(2,to_date('2021/9/2','yyyy/mm/dd'));
insert into liucun values(4,to_date('2021/9/2','yyyy/mm/dd'));
insert into liucun values(5,to_date('2021/9/2','yyyy/mm/dd'));
insert into liucun values(1,to_date('2021/9/3','yyyy/mm/dd'));
insert into liucun values(2,to_date('2021/9/3','yyyy/mm/dd'));
insert into liucun values(3,to_date('2021/9/3','yyyy/mm/dd'));
insert into liucun values(4,to_date('2021/9/3','yyyy/mm/dd'));
insert into liucun values(5,to_date('2021/9/3','yyyy/mm/dd'));
insert into liucun values(2,to_date('2021/9/4','yyyy/mm/dd'));
insert into liucun values(3,to_date('2021/9/4','yyyy/mm/dd'));
insert into liucun values(5,to_date('2021/9/4','yyyy/mm/dd'));
insert into liucun values(6,to_date('2021/9/4','yyyy/mm/dd'));
方法一:根据用户id分组后,每组里面日期最小的就说明登录时间最早,登录时间最早的那次就是新用户
select id,min(login_dt)
from liucun
group by id;
执行结果如下:
方法二:使用相关子查询
分析:登录的这天之前的日期中,用户id一直都没有出现过,就说明这个用户只在这天登录,所以它就是这天的新增用户
select id,login_dt
from liucun t1
where id not in (select id
from liucun t2
where t2.login_dt<t1.login_dt);
题目二:
找出连续三天以上登录的用户id
先查看一下全表数据:
想让相同id的数据放在同一组,对id进行排序:
对id排序后日期是乱的,并没有按照顺序排列,所以对id和日期一起排序,就得到了下面的数据:
可以看出,每一组相同的id里面的日期已经排好了顺序,日期数据是可以进行加减计算的,加减一个数字得到的还是日期:
现在对每个日期减去伪劣的行号,得到的也是一个日期,可以看到对每组数据减去rownum之后,i以id为1的用户为例,从9.1到9.3都是连续登录的,所以在连续时间内login_dt-rownum的结果变成了一个恒定的值,同一各id并且日期是同一组恒定的值就说明这几天该用户都是连续登录的。
现在对这组数据进行分组并统计个数,就得到了下面的数据:
要得到连续三天以上登录的用户id,只需要对分组后的数据再次进行过滤,过滤条件就是count(*)>=3:
具体代码:
select id,login_dt-rownum,count(*)
from (select * from liucun order by id, login_dt)
group by id,login_dt-rownum
having count(*)>=3;
使用另外一个例子:
建表:
CREATE TABLE T_Person (FName VARCHAR2(20),FCity VARCHAR2(20),FAge INT,FSalary INT);
---插入数据
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Tom','BeiJing',20,3000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Tim','ChengDu',21,4000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Jim','BeiJing',22,3500);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Lily','London',21,2000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('John','NewYork',22,1000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('YaoMing','BeiJing',20,3000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Swing','London',22,2000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Guo','NewYork',20,2800);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('YuQian','BeiJing',24,8000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Ketty','London',25,8500);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Kitty','ChengDu',25,3000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Merry','BeiJing',23,3500);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Smith','ChengDu',30,3000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Bill','BeiJing',25,2000);
INSERT INTO T_Person(FName,FCity,FAge,FSalary)
VALUES('Jerry','NewYork',24,3300);
查工资大于本城市平均工资的人员信息:
select t1.*
from t_person t1
where t1.fsalary<(select avg(t2.fsalary) avg2 from t_person t2 where t1.fcity=t2.fcity);