SQL之提取至少连续N个月零申报数据

目录

·     1.首先对数据进行排序。

·     2.获取非零申报的数据。

·     3.将零申报的数据分组。可以看到px为2-4的分组编号为5,7-8的分组编号为9,10-13的分组编号是14,15-19的分组编号是20。

·     4.获取零申报的数据。此时就有了分组编号和连续出现的次数。

·     5.查询至少连续N次零申报的分组编号。可以看到分组编号5是连续3次,14是连续4次,20是连续5次。

·     6.最后带入分组编号,获取对应的连续数据。


        如上图所示,一户纳税人从2021年开始进行申报,其中标注蓝色的行是零申报的数据。现假设想提取至少连续N个月零申报的数据。

      思路:把连续数据视为一个个分组数据,每个分组数据以临近非零申报数据为一组。

·     1.首先对数据进行排序。

  select nsrsbh,
         nsrmc,
         to_date(skssqq, 'YYYY-MM-DD') as skssqq,
         to_date(skssqz, 'YYYY-MM-DD') as skssqz,
         ynse,
         row_number() over(order by skssqq) px
    from scott.sb_sbxx

·     2.获取非零申报的数据。

with sbxx as(
select nsrsbh,
       nsrmc,
       to_date(skssqq, 'YYYY-MM-DD') as skssqq,
       to_date(skssqz, 'YYYY-MM-DD') as skssqz,
       ynse,
       row_number() over(order by skssqq) px
  from scott.sb_sbxx)
--,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0

·     3.将零申报的数据分组。可以看到px为2-4的分组编号为5,7-8的分组编号为9,10-13的分组编号是14,15-19的分组编号是20。

with sbxx as(
select nsrsbh,
       nsrmc,
       to_date(skssqq, 'YYYY-MM-DD') as skssqq,
       to_date(skssqz, 'YYYY-MM-DD') as skssqz,
       ynse,
       row_number() over(order by skssqq) px
  from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
--,lsb_fz as(
 select a.nsrsbh,
         a.nsrmc,
         a.skssqq,
         a.skssqz,
         a.ynse,
         a.px,
         min(b.px) as fz
    from sbxx a
   inner join flsb b
      on a.px <= b.px
   group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px

·     4.获取零申报的数据。此时就有了分组编号和连续出现的次数。

with sbxx as(
select nsrsbh,
       nsrmc,
       to_date(skssqq, 'YYYY-MM-DD') as skssqq,
       to_date(skssqz, 'YYYY-MM-DD') as skssqz,
       ynse,
       row_number() over(order by skssqq) px
  from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
,lsb_fz as(
 select a.nsrsbh,
         a.nsrmc,
         a.skssqq,
         a.skssqz,
         a.ynse,
         a.px,
         min(b.px) as fz
    from sbxx a
   inner join flsb b
      on a.px <= b.px
   group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px)
--,jgb as(
 select row_number() over(partition by a.fz order by a.px) ylx, a.*
    from lsb_fz a
   where a.ynse = 0)

·     5.查询至少连续N次零申报的分组编号。可以看到分组编号5是连续3次,14是连续4次,20是连续5次。

with sbxx as(
select nsrsbh,
       nsrmc,
       to_date(skssqq, 'YYYY-MM-DD') as skssqq,
       to_date(skssqz, 'YYYY-MM-DD') as skssqz,
       ynse,
       row_number() over(order by skssqq) px
  from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
,lsb_fz as(
 select a.nsrsbh,
         a.nsrmc,
         a.skssqq,
         a.skssqz,
         a.ynse,
         a.px,
         min(b.px) as fz
    from sbxx a
   inner join flsb b
      on a.px <= b.px
   group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px)
,jgb as(
 select row_number() over(partition by a.fz order by a.px) ylx, a.*
    from lsb_fz a
   where a.ynse = 0)
--,yq as(
 select fz, count(*) from jgb group by fz having count(*) >= 3

·     6.最后带入分组编号,获取对应的连续数据。

with sbxx as(
select nsrsbh,
       nsrmc,
       to_date(skssqq, 'YYYY-MM-DD') as skssqq,
       to_date(skssqz, 'YYYY-MM-DD') as skssqz,
       ynse,
       row_number() over(order by skssqq) px
  from scott.sb_sbxx)
,flsb as(
select nsrsbh, nsrmc, skssqq, skssqz, ynse, px from sbxx where ynse <> 0)
,lsb_fz as(
 select a.nsrsbh,
         a.nsrmc,
         a.skssqq,
         a.skssqz,
         a.ynse,
         a.px,
         min(b.px) as fz
    from sbxx a
   inner join flsb b
      on a.px <= b.px
   group by a.nsrsbh, a.nsrmc, a.skssqq, a.skssqz, a.ynse, a.px)
,jgb as(
 select row_number() over(partition by a.fz order by a.px) ylx, a.*
    from lsb_fz a
   where a.ynse = 0)
,yq as(
 select fz, count(*) from jgb group by fz having count(*) >= 3)
select nsrsbh, nsrmc, skssqq, skssqz, ynse, fz
  from jgb
 where fz in (select fz from yq)
 order by fz, ylx;

·     本文最后要感谢杜哥哥传授的思路技巧!学海无涯苦作舟!

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值