java duplicate field_Counting consecutive duplicate field with SQL

博客讨论了一个在SQL Server 2016中处理连续重复值计数的挑战,即'Gaps and Islands'问题。给出了多个查询解决方案,用于从数据表中找出并计算特定字段连续重复的次数。示例数据涉及日期、状态和个人ID,解决方案通过窗口函数row_number()和datediff()来识别和计数连续的重复记录。
摘要由CSDN通过智能技术生成

可以将文章内容翻译成中文,广告屏蔽插件会导致该功能失效:

问题:

I have this data in myTable :

Date Status PersonID

-----------------------------------------

2018/01/01 2 2015 ┐ 2

2018/01/02 2 2015 ┘

2018/01/05 2 2015 ┐

2018/01/06 2 2015 3

2018/01/07 2 2015 ┘

2018/01/11 2 2015 - 1

2018/01/01 2 1018 - 1

2018/01/03 2 1018 - 1

2018/01/05 2 1018 ┐ 2

2018/01/06 2 1018 ┘

2018/01/08 2 1018 ┐ 2

2018/01/09 2 1018 ┘

2018/01/03 2 1625 ┐

2018/01/04 2 1625 4

2018/01/05 2 1625

2018/01/06 2 1625 ┘

2018/01/17 2 1625 - 1

2018/01/29 2 1625 - 1

-----------------------------------

and I need to count consecutive duplicate values like this:

This is the result I need:

count personid

-----------------

2 2015

3 2015

1 2015

1 1018

1 1018

2 1018

2 1018

4 1625

1 1625

1 1625

I am using SQL Server 2016 - please help

回答1:

This is a 'Gaps and Islands' problem, you can try like following.

;with cte

as (select *,

dateadd(day, -row_number()

over (partition by status, personid

order by [date] ), [date]) AS grp

FROM @table

)

,cte1

AS (select *,row_number() over(partition by personid, grp,status order by [date]) rn,

count(*) over(partition by personid, grp) ct

from cte

)

select ct as count, personid

from cte1

where rn=1

Note: You might not get the rows in same sequence as you don't have any column which can be used for ordering the way you showed in the desired output.

回答2:

Here is the easiest and small query

CREATE TABLE #T (

[Date] date,

[Status] int,

PersonId int

);

INSERT #T

VALUES ('2018/01/01', 2, 2015),

('2018/01/02', 2, 2015),

('2018/01/05', 2, 2015),

('2018/01/06', 2, 2015),

('2018/01/07', 2, 2015),

('2018/01/11', 2, 2015),

('2018/01/01', 2, 1018),

('2018/01/03', 2, 1018),

('2018/01/05', 2, 1018),

('2018/01/06', 2, 1018),

('2018/01/08', 2, 1018),

('2018/01/09', 2, 1018),

('2018/01/03', 2, 1625),

('2018/01/04', 2, 1625),

('2018/01/05', 2, 1625),

('2018/01/06', 2, 1625),

('2018/01/17', 2, 1625),

('2018/01/29', 2, 1625)

SELECT

MAX(cnt),

personid

FROM (SELECT

ROW_NUMBER() OVER (PARTITION BY GRP ORDER BY [Date]) AS cnt,

personid,

GRP

FROM (SELECT

personid,

[Date],

DATEDIFF(DAY, '1900-01-01', [Date]) - ROW_NUMBER() OVER (ORDER BY Personid DESC) AS GRP

FROM #T) A) AS B

GROUP BY personid,

GRP

ORDER BY PersonId DESC

回答3:

The main challenge to find out the gap between two dates and regarding each date you can create that gap by using row_number() analytic function and datediff function

with cte as

(

select '2018-01-01' as d, 2 as id , 2015 as pid

union all

select '2018-01-02',2,2015

union all

select '2018-01-05',2,2015 union all

select '2018-01-06',2,2015 union all

select '2018-01-07',2,2015

union all

select '2018-01-11',2,2015

), cte1 as (SELECT *,

datediff(day, Row_number()

OVER (

partition BY id, pid

ORDER BY [d] ), [d]) AS dif

FROM cte

) select distinct pid,count(*) over(partition by pid,dif) as cnt from cte1

回答4:

This type of problem is known as 'Gaps and Islands'. You are either identifying consecutive data sets (Islands) or range of values between two islands (Gaps). There are many different ways to achieve the results that also performs well with large data sets. You can refer the below well written articles for that.

Here is an attempt to your question.

CREATE TABLE #test

(

dt DATETIME

,Status INT

,PersonID INT

)

INSERT INTO #Test (dt, Status, PersonID) VALUES

('2018/01/01', 2, 2015),

('2018/01/02', 2, 2015),

('2018/01/05', 2, 2015),

('2018/01/06', 2, 2015),

('2018/01/07', 2, 2015),

('2018/01/11', 2, 2015),

('2018/01/01', 2, 1018),

('2018/01/03', 2, 1018),

('2018/01/05', 2, 1018),

('2018/01/06', 2, 1018),

('2018/01/08', 2, 1018),

('2018/01/09', 2, 1018),

('2018/01/03', 2, 1625),

('2018/01/04', 2, 1625),

('2018/01/05', 2, 1625),

('2018/01/06', 2, 1625),

('2018/01/17', 2, 1625),

('2018/01/29', 2, 1625)

;with cte_dt_from

AS

(

SELECT PersonID, MIN(Dt) dt_from_start

FROM #Test

GROUP BY PersonID

),

cte_offset_num

AS

(

SELECT T1.PersonID, T1.dt, DATEDIFF(DAY, T2.dt_from_start, T1.dt) dt_offset

FROM #test T1

INNER JOIN cte_dt_from T2 ON T2.PersonID = T1.PersonID

),

cte_starting_point

AS

(

SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum

FROM cte_offset_num AS A

WHERE NOT EXISTS (

SELECT *

FROM cte_offset_num AS B

WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset - 1)

)

,

cte_ending_point

AS

(

SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum

FROM cte_offset_num AS A

WHERE NOT EXISTS (

SELECT *

FROM cte_offset_num AS B

WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset + 1)

)

SELECT (E.dt_offset - S.dt_offset) + 1 AS [count], S.PersonID

FROM cte_starting_point AS S

JOIN cte_ending_point AS E ON E.PersonID = S.PersonID AND E.rownum = S.rownum

ORDER BY S.PersonID;

DROP TABLE #Test;

回答5:

WITH T1 AS

(SELECT Date,

Date - ROW_NUMBER() OVER (PARTITION BY Status, PersonID ORDER BY Date) AS Grp

FROM myTable)

SELECT personid,

ROW_NUMBER() OVER (PARTITION BY Grp ORDER BY Date) AS Consecutive

FROM T1

On this result you can apply a MAX(), to get the number of records for each personid.

Refer this question to get breakdown details

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值