mysql实例
查询一天内登陆两次以上的用户
分析:
- 数据存储结构为:用户信息表 和 登陆记录表。
登陆记录表一般会有用户ID,登陆时间这两个核心字段。 - 先从登陆信息表中找到一天内登陆两次以上的用户(id)
- 关联用户信息表得到用户信息。
实现
创建用户表(user_info)
create tableuser_info
(
id
bigint unsigned AUTO_INCREMENT,
username
varchar(50) NOT NULL,
password
varchar(50) NOT NULL,
gender
ENUM(‘male’, ‘female’),
birthday
DATETIME NOT NULL,
phone_number
varchar(20) NOT NULL,
email
varchar(100),
create_time
DATETIME NOT NULL,
PRIMARY KEY(id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
创建登陆记录表(login)
create table login
(
id
bigint unsigned AUTO_INCREMENT,
user_id
bigint unsigned NOT NULL,
login_time
DATETIME NOT NULL,
PRIMARY KEY(id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
代码
SELECT a.user_id, user_info.username, user_info.gender
FROM (
SELECT user_id, count(login_time) AS cnt
FROM login
WHERE login_time >= ‘2024-04-01 00:00:00’ AND login_time <= ‘2024-04-01 23:59:59’
GROUP BY user_id
HAVING cnt >=0
) a
LEFT JOIN user_info
ON user_info.id = a.user_id
销量前10的品类和商品
分析
- 数据存储结构: 订单明细表
- 先计算每个品类/商品的销量
- 对每个品类/商品按照销量倒排序
- 取销量前10的品类/商品的结果
实现
CREATE TABLEorder_item
(
order_id
varchar(50) DEFAULT NULL COMMENT ‘订单ID’,
goods_id
int(11) NOT NULL COMMENT ‘商品ID’,
shop_id
varchar(20) NOT NULL COMMENT ‘门店ID’,
cate_id
int(11) DEFAULT NULL COMMENT ‘商品品类ID’,
price
decimal(12,2) DEFAULT NULL COMMENT ‘商品销售单价’,
qty
int DEFAULT NULL COMMENT ‘商品销售数量’,
sale_value
decimal(18,3) DEFAULT NULL COMMENT ‘商品销售额=商品销售单价 * 销售量’,
ori_sale_value
decimal(18,3) DEFAULT NULL COMMENT ‘商品原价’,
sale_date
date NOT NULL COMMENT ‘订单支付日期’,
KEYorder_item_order_id_idx
(order_id
,goods_id
,shop_id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
代码
SELECT cate_id, sum(sale_value)
FROM order_item
GROUP BY cate_id
ORDER BY sum(sale_value) DESC
LIMIT 10
SELECT
goods_id, sum(sale_value)
FROM order_item
WHERE sale_date >= ‘2023-01-01’ AND sale_date <= ‘2023-01-31’
GROUP BY goods_id
ORDER BY sum(sale_value) DESC
LIMIT 10
统计用户在本店二次购买商品的比例
分析
- 数据存储结构
一般会有一张订单表存储:订单ID, 用户ID,商品ID,购买时间等信息。 - 统计每个用户在店铺的购买次数
- 查询购买一次以上和二次以上的用户数
根据第二步可以计算出每个购买次数的用户数,最终就可以得出购买二次以上的用户比例。
实现
CREATE TABLEorder_list
(
order_id
varchar(50) DEFAULT NULL,
shop_id
varchar(10) NOT NULL COMMENT ‘门店ID’,
pay_time
datetime DEFAULT NULL COMMENT ‘订单支付时间’,
casher_id
varchar(2) COMMENT ‘银员ID’,
cash_value
decimal(28,3) DEFAULT NULL COMMENT ‘订单实收金额=对应订单明细表中SaleValue累加’,
pay_value
decimal(18,2) DEFAULT NULL COMMENT ‘付款金额,pay_value - cash_value = 找零金额’,
discount_value
decimal(18,2) DEFAULT NULL COMMENT ‘订单折扣金额’,
member_id
varchar(50) DEFAULT NULL COMMENT ‘用户ID’,
UNIQUE KEYorder_list_order_id_idx
(order_id
) USING BTREE
)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
代码
SELECT
sum(IF(buy_cnt > 1, num, 0)), //购买次数大于1次以上的用户数
sum(num), //所有购买次数的用户数
sum(IF(buy_cnt > 1, num, 0)) / sum(num) //二次购买比例
from (
select buy_cnt, count(member_id) as num
from (
select member_id, count(DISTINCT order_id) as buy_cnt
from order_list
group by member_id;
)a
group by buy_cnt
) b;
提取某月的日活动人数
分析
- 数据存储结构:对于用户在App上的活跃情况,我们一般是采用埋点的方式记录用户在App的操作行为,也就是用户在App上每个操作我们都可以记录这次行为的时间,行为,设备,用户等等一系列的信息。那我们一般会把这些内容存在日志表里,核心字段包括用户ID, 时间,行为内容。
实现
SELECT count(DISTINCT user_id)
FROM login
WHERE login_time >= ‘2024-03-01 00:00:00’ AND login_time <= ‘2024-03-31 23:59:59’
如何求某个商品的最后点击时间
分析
1. 数据存储结构-商品点击记录表:包括 用户ID, 商品ID, 点击时间
实现
CREATE TABLE goods_click_records
(
id
bigint unsigned AUTO_INCREMENT,
goods_id
varchar(20) NOT NULL,
user_id
varchar(20),
click_time
datetime NOT NULL,
PRIMARY KEY(id
)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
SELECT goods_id, max(click_time) as last_click_time
FROM goods_click_records
WHERE goods_id='XXXXX'
SQL中数据如何去重
-
GROUP BY 是按照指定的规则对数据进行分组,所谓的分组就是将一个数据划分成若干各组,让然后针对若干个组进行数据处理。分组类似先建立索引再查索引,当数据量比较大时,GROUP BY速度要优先于DISTINCT.
-
DISTINCET 是一个关键字,常用于SELECT 之后,用于取出去重之后的值。主要是对数据两两进行比较,需要遍历整个表。可以配合聚合函数COUNT 一起使用,GROUP BY不行。
-
where 和 having的用法区别
都是基于给定的条件筛选出相对应的结果。
where是一个约束声明,是在查询数据库的结果返回之前对查询条件进行约束,即在结果返回之前起到作用。
where条件的执行顺序是在group by之前,因此where子句的作用是对查询结果分组之前,将不符合条件的行去掉,即在分组之前过滤数据。
where条件中不能包含聚合函数,因为where条件的执行顺序大于聚合函数。
having是一个过滤声明,是在查询数据库的结果返回之后进行过滤,即在执行结果返回值后起到作用。
having子句的作用是筛选满足条件的分组,即在分组之后过滤数据,条件中经常包含聚合函数,使用having条件显示特定的组,也可以使用多个分组标准进行分组。having子句的条件一定是select子句中显示的内容。
如何求弹幕回复率,每日新增用户数,每日日活
分析
1. 数据结构-弹幕回复明细表 包括:comment_id 回复ID, uid用户ID,to_id 回复的弹幕ID, time 回复时间
实现
1 有回复的弹幕数
SELECT count(to_id) as num
FROM (
SELECT to_id, count(comment_id) as num
FROM comments
GROUP BY to_id
HAVING num > 1
)a
2 弹幕数
SELECT count(DISTINCT to_id) as num
FROM comments
GROUP BY to_id
3 每日新增数
SELECT to_days(first_time), count(uid)
FROM (
SELECT uid, min(time) as first_time
FROM comments
GROUP BY uid
) a
GROUP BY to_days(first_time)
4 每日日活
SELECT to_days(time) as DATE, count(DISTINCT uid) as num
FROM comments
GROUP BY to_days(time)
考勤记录
CREATE TABLE department
(
dept_id
int NOT NULL,
dept_name
varchar(50) NOT NULL,
PRIMARY KEY (dept_id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE job
(
job_id
int NOT NULL,
job_title
varchar(50) NOT NULL,
PRIMARY KEY (job_id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE employee
(
emp_id
int NOT NULL,
emp_name
varchar(50) NOT NULL,
sex
varchar(10) NOT NULL,
dept_id
int NOT NULL,
manager
int DEFAULT NULL,
hire_date
date NOT NULL,
job_id
int NOT NULL,
salary
decimal(8,2) NOT NULL,
bonus
decimal(8,2) DEFAULT NULL,
email
varchar(100) NOT NULL,
PRIMARY KEY (emp_id
),
UNIQUE KEY uk_emp_email
(email
),
KEY idx_emp_name
(emp_name
),
KEY idx_emp_dept
(dept_id
),
KEY idx_emp_job
(job_id
),
KEY idx_emp_manager
(manager
),
CONSTRAINT fk_emp_dept
FOREIGN KEY (dept_id
) REFERENCES department
(dept_id
),
CONSTRAINT fk_emp_job
FOREIGN KEY (job_id
) REFERENCES job
(job_id
),
CONSTRAINT fk_emp_manager
FOREIGN KEY (manager
) REFERENCES employee
(emp_id
),
CONSTRAINT ck_emp_salary
CHECK ((salary
> 0)),
CONSTRAINT ck_emp_sex
CHECK ((sex
in (_utf8mb4’男’,_utf8mb4’女’)))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
– 创建日历表calendar
CREATE TABLE calendar
(
id
int NOT NULL,
calendar_date
date NOT NULL,
calendar_year
int NOT NULL,
calendar_month
int NOT NULL,
calendar_day
int NOT NULL,
is_work_day
varchar(1) NOT NULL DEFAULT ‘Y’,
PRIMARY KEY (id
),
UNIQUE KEY uk_calendar
(calendar_date
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
– 创建考勤记录表attendance
CREATE TABLE attendance
(
id
int NOT NULL,
check_date
date NOT NULL,
emp_id
int NOT NULL,
clock_in
timestamp NULL DEFAULT NULL,
clock_out
timestamp NULL DEFAULT NULL,
PRIMARY KEY (id
),
UNIQUE KEY uk_attendance
(check_date
,emp_id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
实现
select c.calendar_date, e.emp_name, a.clock_in, a.clock_out,
case
when a.clock_out is NULL then “缺勤”
when extract(hour from a.clock_in) >= 9 then “迟到”
else “早退”
end as “考勤状态”
from calendar c
join employee e
left join attendance a on (a.emp_id = e.emp_id and a.check_date = c.calendar_date)
where c.is_work_day=‘Y’ and (
a.id is null
or a.clock_out is null
or extract(hour from a.clock_in) >= 9
or extract(hour from a.clock_out) < 18
)
order by c.calendar_date asc ;
case 表达式
select e.emp_name, e.hire_date,
case
when (2020 - extract(year from e.hire_date)) < 10 and e.sex=‘男’ then ‘手表’
when (2020 - extract(year from e.hire_date)) < 10 and e.sex=‘女’ then ‘化妆品’
when (2020 - extract(year from e.hire_date)) < 15 and e.sex=‘男’ then ‘手机’
when (2020 - extract(year from e.hire_date)) < 15 and e.sex=‘女’ then ‘项链’
else ‘电脑’
end as ‘礼品’
from employee e;
select sname as ‘姓名’,
sum(case cname when ‘语文’ then grade end) as ‘语文’,
sum(case cname when ‘数学’ then grade end) as ‘数学’,
sum(case cname when ‘英语’ then grade end) as ‘英语’,
sum(grade) as ‘总分’
from t_score s
group by sname;
select coalesce(product, ‘总计’) as ‘产品’,
coalesce(channel, ‘所有渠道’) as ‘渠道’,
sum(case extract(month from saledate) when 1 then amount end) as ‘1月’,
sum(case extract(month from saledate) when 2 then amount end) as ‘2月’,
sum(case extract(month from saledate) when 3 then amount end) as ‘3月’,
sum(case extract(month from saledate) when 4 then amount end) as ‘4月’,
sum(case extract(month from saledate) when 5 then amount end) as ‘5月’,
sum(case extract(month from saledate) when 6 then amount end) as ‘6月’,
sum(amount) as ‘合计’
from sales_data sd
group by product, channel with rollup;
计算每个卖家销量最高的商品
CREATE TABLE t5 (
seller_id int,
buyer_id int,
item_id int,
num int
);
INSERT INTO t5 VALUES (1, 11, 1, 100), (1, 12, 1, 200), (1, 12, 2, 300);
INSERT INTO t5 VALUES (2, 11, 1, 100), (2, 12, 3, 200);
with seller_items_cnt as(
select seller_id, item_id, sum(num) as total
from t5
group by seller_id, item_id
),
cnt_rank as(
select seller_id, item_id, total,
rank() over(partition by seller_id order by total DESC) as rk
from seller_items_cnt
)
select *
from cnt_rank
where rk = 1;
自建商品销量占比
CREATE TABLE t4 (
seller_id int,
buyer_id int,
item_id int,
num int,
price numeric,
source_type tinyint
);
INSERT INTO t4 VALUES (1, 11, 1, 5, 9.9, 1), (1, 12, 2, 15, 16.9, 0);
INSERT INTO t4 VALUES (2, 13, 3, 10, 89.9, 0), (2, 14, 4, 1, 1999, 0);
select seller_id, sum(case source_type when 1 then num else 0 end) /sum(num) as rate
from t4 t
group by seller_id;
select seller_id, sum(if(source_type=1, num, 0)) /sum(num) as rate
from t4 t
group by seller_id;
计算用户首次销售后30天内累计销售金额
CREATE TABLE t3(
user_id integer,
sell_day DATE,
amount numeric
);
INSERT INTO t3 VALUES (1, ‘2021-01-01’, 100), (1, ‘2021-01-30’, 200), (1, ‘2021-02-01’, 100);
INSERT INTO t3 VALUES (2, ‘2021-01-01’, 200), (2, ‘2021-01-11’, 200), (2, ‘2021-01-12’, 200);
INSERT INTO t3 VALUES (3, ‘2021-01-02’, 200), (3, ‘2021-01-11’, 200), (3, ‘2021-01-12’, 200);
select s.user_id, s.first_date, sum(amount) as amout
from (
select user_id, min(sell_day) as first_date
from t3
group by user_id
)s
join t3 t
on(t.user_id = s.user_id and t.sell_day between s.first_date and s.first_date + interval ‘30’ day)
group by s.user_id;
计算直播间的人气值
CREATE TABLE t1(
author_id int,
live_id int,
live_duration int
);
CREATE TABLE t2(
user_id int,
live_id int,
watching_duration int
);
insert into t1 values (1,1,60), (2,2,120),(3,3,30);
insert into t2 values (11,1,60), (12,1,30), (13,1,60), (12, 2, 30), (14, 2, 90);
select t1.author_id, t1.live_id, coalesce(sum(t2.watching_duration) / any_value(t1.live_duration) , 0) as acu
from t1
left join t2 on (t1.live_id = t2.live_id)
group by t1.author_id, t1.live_id;
异常交易分析
– 创建交易流水表
CREATE TABLE transfer_log(
log_id INTEGER NOT NULL PRIMARY KEY,
log_ts TIMESTAMP NOT NULL,
from_user VARCHAR(50) NOT NULL,
to_user VARCHAR(50),
type VARCHAR(10) NOT NULL,
amount NUMERIC(10) NOT NULL
);
– 初始化数据
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (1,‘2019-01-02 10:31:40’,‘62221230000000’,NULL,‘存款’,50000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (2,‘2019-01-02 10:32:15’,‘62221234567890’,NULL,‘存款’,100000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (3,‘2019-01-03 08:14:29’,‘62221234567890’,‘62226666666666’,‘转账’,200000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (4,‘2019-01-05 13:55:38’,‘62221234567890’,‘62226666666666’,‘转账’,150000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (5,‘2019-01-07 20:00:31’,‘62221234567890’,‘62227777777777’,‘转账’,300000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (6,‘2019-01-09 17:28:07’,‘62221234567890’,‘62227777777777’,‘转账’,500000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (7,‘2019-01-10 07:46:02’,‘62221234567890’,‘62227777777777’,‘转账’,100000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (8,‘2019-01-11 09:36:53’,‘62221234567890’,NULL,‘存款’,40000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (9,‘2019-01-12 07:10:01’,‘62221234567890’,‘62228888888881’,‘转账’,10000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (10,‘2019-01-12 07:11:12’,‘62221234567890’,‘62228888888882’,‘转账’,8000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (11,‘2019-01-12 07:12:36’,‘62221234567890’,‘62228888888883’,‘转账’,5000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (12,‘2019-01-12 07:13:55’,‘62221234567890’,‘62228888888884’,‘转账’,6000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (13,‘2019-01-12 07:14:24’,‘62221234567890’,‘62228888888885’,‘转账’,7000);
INSERT INTO transfer_log (log_id,log_ts,from_user,to_user,type,amount) VALUES (14,‘2019-01-21 12:11:16’,‘62221234567890’,‘62228888888885’,‘转账’,70000);
– 找出 5 天之内累积转账超过 100 万的账号
SELECT *
FROM (
SELECT *,
sum(amount) OVER (PARTITION BY from_user ORDER BY log_ts RANGE BETWEEN INTERVAL ‘5’ DAY PRECEDING AND CURRENT ROW ) total
FROM transfer_log
WHERE TYPE = ‘转账’
) t
WHERE total>=1000000;
– 找出相同收付款人5天内连续转账3次以上的记录
select *
from (
SELECT *,
count(1) OVER (PARTITION BY from_user, to_user ORDER BY log_ts RANGE BETWEEN INTERVAL ‘5’ DAY PRECEDING AND CURRENT ROW ) times
FROM transfer_log
WHERE TYPE = ‘转账’
) t
where times >= 3;
好友关系分析
create table t_user(
user_id int primary key,
user_name varchar(50) not null
);
create table t_friend(
user_id int not null,
friend_id int not null,
created_time timestamp not null,
primary key (user_id, friend_id)
);
查看好友列表
select f.user_id, f.friend_id, u.user_name as friend_name
from t_friend f
left join t_user u
on u.user_id = f.friend_id
where f.user_id = 2;
– 查看共同好友
with f1(friend_id, friend_name) as (
select tu.user_id as friend_id, tu.user_name as friend_name
from t_friend tf
left join t_user tu
on tu.user_id = tf.friend_id
where tf.user_id = 2
),
f2(friend_id, friend_name) as (
select tu.user_id as friend_id, tu.user_name as friend_name
from t_friend tf
left join t_user tu
on tu.user_id = tf.friend_id
where tf.user_id = 4
)
select *
from f1
join f2
on f1.friend_id=f2.friend_id;
推荐好友
with friends(friend_id, friend_name) as (
select tu.user_id as friend_id, tu.user_name as friend_name
from t_friend tf
left join t_user tu
on tu.user_id = tf.friend_id
where tf.user_id = 2
),
fof(user_id, friend_id, friend_name) as (
select tf.user_id, tu.user_id as friend_id, tu.user_name as friend_name
from t_friend tf
join t_user tu on tu.user_id = tf.friend_id
join friends fs on fs.friend_id = tf.user_id
where tf.friend_id != 2 and tf.friend_id not in(select friend_id from friends)
)
SELECT friend_id, friend_name, count(*) as ‘共同好友’
FROM fof u
GROUP BY u.friend_id, u.friend_name;
关系链
with recursive friends_chain(uid, fid, hops, path) as (
select user_id, friend_id, 0 as hops, concat(‘,’, user_id, ‘,’, friend_id) as path
from t_friend
where t_friend.user_id = 6
union all
select fc.uid, tf.friend_id, fc.hops + 1, concat(fc.path, ‘,’, tf.friend_id)
from friends_chain fc
join t_friend tf on fc.fid = tf.user_id
where instr(fc.path, concat(‘,’, tf.friend_id, ‘,’)) = 0
AND hops<=6
)
SELECT uid, fid, hops, SUBSTR(path, 2) AS path
FROM friends_chain
WHERE fid = 7
ORDER BY hops;
同比,环比,复合增长率
– 同比,环比,复合增长率
CREATE TABLE sales_monthly(
product VARCHAR(20),
ym VARCHAR(10),
amount NUMERIC(10, 2)
);
– 环比增长率(本期数据和上期数据相比的增长率)
select product AS “产品”, ym “年月”, amount “本期销量”, lag(amount, 1) over(partition by product order by ym) ‘上期销量’,
100 * ((amount - lag(amount, 1) over(partition by product order by ym)) / lag(amount, 1) over(partition by product order by ym)) as ‘环比增长率(%)’
from sales_monthly sm
order by product, ym;
– 同比增长率(本期数据与上一年活历史同期相比的增长)
select product AS “产品”, ym “年月”, amount “本期销量”, lag(amount, 12) over(partition by product order by ym) ‘历史同期销量’,
100 * ((amount - lag(amount, 12) over(partition by product order by ym)) / lag(amount, 12) over(partition by product order by ym)) as ‘同比增长率(%)’
from sales_monthly sm
order by product, ym;
– 月均复合增长率(第N期的数据除以第一期的基准数据,然后开N-1次方再减去1得到的结果)
with f1(product, ym, amount, first_amount, num) as (
select product, ym, amount, first_value(amount) over(partition by product order by ym),
row_number() over(partition by product order by ym)
from sales_monthly sm
)
select product, ym, amount, (power(1.0 * amount/first_amount, 1.0/nullif(num-1, 0)) - 1) * 100 as ‘复合增长率(%)’
from f1
ORDER BY product, ym;
连续登陆
CREATE TABLE t_login(
uid int,
login_time timestamp
);
– 窗口函数ROW_NUMBER
with t1(uid, ymd) as(
select distinct uid, date(login_time) ymd
from t_login
WHERE login_time BETWEEN timestamp ‘2022-01-01 00:00:00’ AND timestamp ‘2022-01-31 23:59:59’
),
t2 as (
select uid, ymd,
row_number() over(partition by uid order by ymd) number,
date_sub(ymd, INTERVAL row_number() over(partition by uid order by ymd) DAY) sub
from t1
)
select uid, min(ymd) as ‘开始时间’ , max(ymd) as ‘结束时间’, count() as ‘连续登陆天数’
from t2
group by uid, sub
having count() >=3;
– 窗口函数 LAG
with t1(uid, ymd) as(
select distinct uid, date(login_time) ymd
from t_login
WHERE login_time BETWEEN timestamp ‘2022-01-01 00:00:00’ AND timestamp ‘2022-01-31 23:59:59’
),
t2 as (
select uid, ymd,
LAG(ymd, 2) over(partition by uid order by ymd) pre2,
datediff(ymd, LAG(ymd, 2) over(partition by uid order by ymd) ) diff
from t1
)
select uid, date_sub(ymd, interval 2 day) as ‘开始时间’, ymd as ‘结束时间’, diff
from t2
where diff = 2;
基于扫码记录查找密接人员
CREATE TABLE trail(
uid varchar(11) NOT NULL,
area varchar(10) NOT NULL,
scan_time timestamp
);
– 找出用户在每个区域的停留开始时间和结束时间
with tmp as (
select uid, area, scan_time,
row_number() over(partition by uid order by scan_time) order1,
row_number() over(partition by uid, area order by scan_time) order2,
row_number() over(partition by uid order by scan_time) - row_number() over(partition by uid, area order by scan_time) diff
from trail t
)
SELECT *
FROM tmp
order by uid, scan_time;
with tmp as (
select uid, area, scan_time,
row_number() over(partition by uid order by scan_time) order1,
row_number() over(partition by uid, area order by scan_time) order2,
row_number() over(partition by uid order by scan_time) - row_number() over(partition by uid, area order by scan_time) diff
from trail t
)
SELECT uid, area, min(scan_time) start_time, max(scan_time) end_time
FROM tmp
GROUP BY uid, area, diff
ORDER BY uid, start_time;
– 问题二:假如某个用户核酸检查为阳性,找出他的伴随人员?
ITH tmp AS (
SELECT uid, area, scan_time,
row_number() over(partition by uid order by scan_time) - row_number() over(partition by uid, area order by scan_time) diff
FROM trail
),
– 查找用户停留30分以上的区域以及起始时间
tmp2 AS (
SELECT uid, area, min(scan_time) start_time, max(scan_time) end_time
FROM tmp
GROUP BY uid, area, diff
HAVING min(scan_time) + INTERVAL 30 MINUTE <= max(scan_time)
)
SELECT *
FROM tmp2 u1
JOIN tmp2 u2
ON (u1.uid <> u2.uid AND u1.area = u2.area
and TIMESTAMPDIFF(MINUTE, u2.start_time , u2.end_time) >= 10
AND u1.start_time + INTERVAL 10 MINUTE <= u2.end_time
AND u2.start_time + INTERVAL 10 MINUTE <= u1.end_time)
WHERE u1.uid = ‘13011111111’;
最大在线人数
CREATE TABLE t_live(
live_id int,
user_id int,
oper_time datetime,
oper_type varchar(10)
);
with t1 as (
select live_id , user_id, oper_time,
case oper_type
when ‘IN’ then 1
when ‘OUT’ then -1
else 0
end flag
),
t2 as (
select *, sum(flag) OVER(PARTITION BY live_id ORDER BY oper_time) as total
from t1
group by live_id
)
t3 as (
select *, rank() over(partition by live_id order by total desc) as rk
from t2
)
select live_id, oper_time, total
from t3
where rk=1;