统计学生连续出勤天数,过滤周末,来反映学生的出勤情况,下面用代码来实现这个功能。
一、创建测试表,插入模拟数据
create table px_data_test_temp.temp_stu_attendance_tb (
date_col string,
stu_no string,
is_attendence string
);
insert into px_data_test_temp.temp_stu_attendance_tb
(date_col, stu_no, is_attendence)
values
('2019-11-01','STU-00001','上课'),
('2019-11-02','STU-00001','上课'),
('2019-11-03','STU-00001','缺勤'),
('2019-11-04','STU-00001','上课'),
('2019-11-05','STU-00001','上课'),
('2019-11-06','STU-00001','上课'),
('2019-11-07','STU-00001','上课'),
('2019-11-08','STU-00001','上课'),
('2019-11-09','STU-00001','缺勤'),
('2019-11-10','STU-00001','缺勤'),
('2019-11-11','STU-00001','上课'),
('2019-11-12','STU-00001','上课'),
('2019-11-13','STU-00001','上课'),
('2019-11-14','STU-00001','上课'),
('2019-11-15','STU-00001','缺勤'),
('2019-11-16','STU-00001','缺勤'),
('2019-11-17','STU-00001','缺勤'),
('2019-11-18','STU-00001','上课'),
('2019-11-19','STU-00001','上课'),
('2019-11-20','STU-00001','上课'),
('2019-11-01','STU-00002','上课'),
('2019-11-02','STU-00002','缺勤'),
('2019-11-03','STU-00002','缺勤'),
('2019-11-04','STU-00002','上课'),
('2019-11-05','STU-00002','上课'),
('2019-11-06','STU-00002','上课'),
('2019-11-07','STU-00002','上课'),
('2019-11-08','STU-00002','上课'),
('2019-11-09','STU-00002','缺勤'),
('2019-11-10','STU-00002','缺勤'),
('2019-11-11','STU-00002','上课'),
('2019-11-12','STU-00002','上课'),
('2019-11-13','STU-00002','上课'),
('2019-11-14','STU-00002','上课'),
('2019-11-15','STU-00002','上课'),
('2019-11-16','STU-00002','缺勤'),
('2019-11-17','STU-00002','缺勤'),
('2019-11-18','STU-00002','上课'),
('2019-11-19','STU-00002','上课'),
('2019-11-20','STU-00002','上课');
二、获取连续出勤的开始日期
select date_col, stu_no, week_which_day,
row_number() over(partition by stu_no order by date_col) as rk,
date_sub(date_col, (row_number() over(partition by stu_no order by date_col) - 1)) from_day --连续打卡的开始日期
from
(
select date_col, stu_no, is_attendence,
-- pmod(datediff(date_col, '2012-01-01'), 7) week_which_day --标记周几,周日为0
dayofweek(date_col) week_which_day
from px_data_test_temp.temp_stu_attendance_tb
group by date_col, stu_no, is_attendence
) t
--将缺勤的数据过滤,但保留周末的数据(如果过滤周末数据,则每个学生最大连续出勤天数为5)
where is_attendence = '上课' or week_which_day in (6, 0);
结果:
三、统计学生连续出勤天数,过滤周末
with t1_tb as
(
select date_col, stu_no, week_which_day,
row_number() over(partition by stu_no order by date_col) as rk,
date_sub(date_col, (row_number() over(partition by stu_no order by date_col) - 1)) from_day --连续打卡的开始日期
from
(
select date_col, stu_no, is_attendence,
-- pmod(datediff(date_col, '2012-01-01'), 7) week_which_day --标记周几,周日为0
dayofweek(date_col) week_which_day
from px_data_test_temp.temp_stu_attendance_tb
group by date_col, stu_no, is_attendence
) t
--将缺勤的数据过滤,但保留周末的数据(如果过滤周末数据,则每个学生最大连续出勤天数为5)
where is_attendence = '上课' or week_which_day in (6, 0)
)
select stu_no, start_date, end_date, continuous_days
from
(
select
stu_no, start_date, end_date, continuous_days,
row_number() over (partition by stu_no order by continuous_days desc) as rk
from
(
select
stu_no,
min(date_col) as start_date,
max(date_col) as end_date,
count(1) as continuous_days
from t1_tb
where week_which_day not in (0,6) -- 过滤周末数据
group by stu_no, from_day
) t1
) t2
where rk = 1;
结果: