目录
· 3.将零申报的数据分组。可以看到px为2-4的分组编号为5,7-8的分组编号为9,10-13的分组编号是14,15-19的分组编号是20。
· 4.获取零申报的数据。此时就有了分组编号和连续出现的次数。
· 5.查询至少连续N次零申报的分组编号。可以看到分组编号5是连续3次,14是连续4次,20是连续5次。
如上图所示,一户纳税人从2021年开始进行申报,其中标注蓝色的行是零申报的数据。现假设想提取至少连续N个月零申报的数据。
思路:把连续数据视为一个个分组数据,每个分组数据以临近非零申报数据为一组。
· 1.首先对数据进行排序。
select nsrsbh,
nsrmc,
to_date(skssqq, 'YYYY-MM-DD') as skssqq,
to_date(skssqz, 'YYYY-MM-DD') as skssqz,
ynse,
row_number() over(order by skssqq) px
from scott.sb_sbxx
· 2.获取非零申报的数据。
with sbxx as(
select nsrsbh,
nsrmc,
to_date(skssqq, 'YYYY-MM-DD') as skssqq,
to_date(skssqz, 'YYYY-MM-DD') as skssqz,
ynse,
row_number() over(order by skssqq) px
from scott.sb_sbxx)
--,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0
· 3.将零申报的数据分组。可以看到px为2-4的分组编号为5,7-8的分组编号为9,10-13的分组编号是14,15-19的分组编号是20。
with sbxx as(
select nsrsbh,
nsrmc,
to_date(skssqq, 'YYYY-MM-DD') as skssqq,
to_date(skssqz, 'YYYY-MM-DD') as skssqz,
ynse,
row_number() over(order by skssqq) px
from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
--,lsb_fz as(
select a.nsrsbh,
a.nsrmc,
a.skssqq,
a.skssqz,
a.ynse,
a.px,
min(b.px) as fz
from sbxx a
inner join flsb b
on a.px <= b.px
group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px
· 4.获取零申报的数据。此时就有了分组编号和连续出现的次数。
with sbxx as(
select nsrsbh,
nsrmc,
to_date(skssqq, 'YYYY-MM-DD') as skssqq,
to_date(skssqz, 'YYYY-MM-DD') as skssqz,
ynse,
row_number() over(order by skssqq) px
from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
,lsb_fz as(
select a.nsrsbh,
a.nsrmc,
a.skssqq,
a.skssqz,
a.ynse,
a.px,
min(b.px) as fz
from sbxx a
inner join flsb b
on a.px <= b.px
group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px)
--,jgb as(
select row_number() over(partition by a.fz order by a.px) ylx, a.*
from lsb_fz a
where a.ynse = 0)
· 5.查询至少连续N次零申报的分组编号。可以看到分组编号5是连续3次,14是连续4次,20是连续5次。
with sbxx as(
select nsrsbh,
nsrmc,
to_date(skssqq, 'YYYY-MM-DD') as skssqq,
to_date(skssqz, 'YYYY-MM-DD') as skssqz,
ynse,
row_number() over(order by skssqq) px
from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
,lsb_fz as(
select a.nsrsbh,
a.nsrmc,
a.skssqq,
a.skssqz,
a.ynse,
a.px,
min(b.px) as fz
from sbxx a
inner join flsb b
on a.px <= b.px
group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px)
,jgb as(
select row_number() over(partition by a.fz order by a.px) ylx, a.*
from lsb_fz a
where a.ynse = 0)
--,yq as(
select fz, count(*) from jgb group by fz having count(*) >= 3
· 6.最后带入分组编号,获取对应的连续数据。
with sbxx as(
select nsrsbh,
nsrmc,
to_date(skssqq, 'YYYY-MM-DD') as skssqq,
to_date(skssqz, 'YYYY-MM-DD') as skssqz,
ynse,
row_number() over(order by skssqq) px
from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
,lsb_fz as(
select a.nsrsbh,
a.nsrmc,
a.skssqq,
a.skssqz,
a.ynse,
a.px,
min(b.px) as fz
from sbxx a
inner join flsb b
on a.px <= b.px
group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px)
,jgb as(
select row_number() over(partition by a.fz order by a.px) ylx, a.*
from lsb_fz a
where a.ynse = 0)
,yq as(
select fz, count(*) from jgb group by fz having count(*) >= 3)
select nsrsbh, nsrmc, skssqq, skssqz, ynse, fz
from jgb
where fz in (select fz from yq)
order by fz, ylx;