本文是本人原创,转载请注明链接!
在MYSQL不能运用窗口函数,要解决连续访问或登陆用户的计算还是要费些工夫的。
/*选出用户 连续登陆天数最多天数 */
SELECT *
FROM (SELECT *
FROM (
SELECT
uid,
max(days) lianxu_days,#最大连续天数
min(login_day) start_date, #开始日期
max(login_day) end_date #结束日期
FROM (SELECT
uid,
@cont_day :=
(CASE
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) = 1)
THEN (@cont_day + 1) /*当上一用户为当前用户,且与上一日差异一天时开始,当前的连续天数加一*/
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) < 1)
THEN (@cont_day + 0) /*当日期为当前用户且与上一日差异小于一天时开始,当前的连续天数不变*/
ELSE 1 /*其他条件,当前连续天数赋初值1*/
END) AS days, /*计算当前行符合条件的连续天数 并传给参数*/
(@cont_ix := (@cont_ix + IF(@cont_day = 1, 1, 0))) AS cont_ix, /*如果当前行符合条件的天数=1,则开始累计 符合条件+1,不符合条件不加,为了记录开始结束日期*/
@last_uid := uid, /*取当前的UID赋给参数*/
@last_dt := created_ts login_day #记录当前登陆日期
FROM (SELECT
distinct create_id uid,
DATE(create_time) created_ts
FROM tag.content
WHERE create_id is not null /*可筛选用户条件*/
ORDER BY create_id, create_time) AS t,/*将用户浏览数据按用户名,及日期排序好取出*/
(SELECT
@last_uid := '', #上一条用户 ID
@last_dt := '', #当前行用户的登陆时间
@cont_ix := 0, #当前用户 第N次满足累计天数
@cont_day := 0) AS t1 #当前用户当前行 的累计连续天数 赋初始字段值
) AS t2
GROUP BY uid, cont_ix
HAVING lianxu_days >= 7 #取连续登陆天数的条件
) tmp
ORDER BY lianxu_days DESC) ntmp
GROUP BY uid;
/*筛选 出连续登陆大 于 N天的用户 及天数 明细 */
SELECT
uid,
max(days) 最大连续天数,
count(distinct cont_ix) 连续天数次数
FROM (SELECT
uid,
@cont_day :=
(CASE
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) = 1)
THEN
(@cont_day + 1)
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) < 1)
THEN
(@cont_day + 0)
ELSE
1
END) AS days,
(@cont_ix := (@cont_ix + IF(@cont_day = 1, 1, 0))) AS cont_ix,
@last_uid := uid,
@last_dt := created_ts login_day
FROM (SELECT
distinct create_id uid,
DATE(create_time) created_ts
FROM tag.content
WHERE create_id is not null
ORDER BY create_id, create_time) AS t,
(SELECT
@last_uid := '',
@last_dt := '',
@cont_ix := 0,
@cont_day := 0) AS t1
) AS t2
where days >=7
GROUP BY uid
ORDER BY 最大连续天数 DESC
/*筛选 出连续登陆大 于 N天的用总户数,总次数 */
SELECT
count(distinct uid) 人数
,count(uid) 次数
FROM (SELECT
uid,
@cont_day :=
(CASE
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) = 1)
THEN
(@cont_day + 1)
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) < 1)
THEN
(@cont_day + 0)
ELSE
1
END) AS days,
(@cont_ix := (@cont_ix + IF(@cont_day = 1, 1, 0))) AS cont_ix,
@last_uid := uid,
@last_dt := created_ts login_day
FROM (SELECT
distinct create_id uid,
DATE(create_time) created_ts
FROM tag.content
WHERE create_id is not null
ORDER BY create_id, create_time) AS t,
(SELECT
@last_uid := '',
@last_dt := '',
@cont_ix := 0,
@cont_day := 0) AS t1
) AS t2
where days>=7
/*连续登陆的大于N天的用户 满足条件的登陆次数*/
SELECT
distinct uid,
count(distinct cont_ix)
FROM (SELECT
uid,
@cont_day :=
(CASE WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) = 1) THEN (@cont_day + 1)
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) < 1) THEN (@cont_day + 0)
ELSE 1
END) AS days,
(@cont_ix := (@cont_ix + IF(@cont_day = 1, 1, 0))) AS cont_ix,
@last_uid := uid,
@last_dt := created_ts login_day
FROM (SELECT
distinct create_id uid,
DATE(create_time) created_ts
FROM tag.content
WHERE create_id is not null
ORDER BY create_id, create_time) AS t,
(SELECT
@last_uid := '',
@last_dt := '',
@cont_ix := 0,
@cont_day := 0) AS t1
) AS t2
where days>=7
group by uid
/*筛选 出连续登陆大 于 N天的日期及用户数,可计算留存率 */
SELECT
count(distinct uid),
login_day 最后登陆天数
FROM (SELECT
uid,
@cont_day :=
(CASE WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) = 1) THEN (@cont_day + 1)
WHEN (@last_uid = uid AND DATEDIFF(created_ts, @last_dt) < 1) THEN (@cont_day + 0)
ELSE 1
END) AS days,
(@cont_ix := (@cont_ix + IF(@cont_day = 1, 1, 0))) AS cont_ix,
@last_uid := uid,
@last_dt := created_ts login_day
FROM (SELECT
distinct create_id uid,
DATE(create_time) created_ts
FROM tag.content
WHERE create_id is not null
ORDER BY create_id, create_time) AS t,
(SELECT
@last_uid := '',
@last_dt := '',
@cont_ix := 0,
@cont_day := 0) AS t1
) AS t2
where days >=2
GROUP BY login_day
ORDER BY 最后登陆天数 DESC
对比在SPARK下类似查询
select
user_id,
min(brow_date),
max(brow_date),
count(*) num
from
(
select
user_id,brow_date,
row_number() over(partition by user_id order by brow_date)-brow_date as group_id **连续访问的组,此处是关键
from
( select user_id,
from_unixtime(unix_timestamp(vist_date,'yyyy/MM/dd'),'yyyyMMdd') as brow_date, **字符转日期
n
from
( select user_id,vist_date,
row_number() over(partition by user_id,vist_date order by vist_date) as n
from user
) t **抽取用户及访问的天数(去重)
where t.n=1 order by user_id,vist_date
) s
)
group by user_id,group_id
having count(*)>1 **连续访问大于1天的
order by user_id
+---------+--------------+--------------+---+
|user_id |min(brow_date)|max(brow_date)|num|
+---------+--------------+--------------+---+
|100605 |20141214 |20141217 |4 |
|1031737 |20141214 |20141215 |2 |
|10001082 |20141214 |20141215 |2 |
|10009860 |20141214 |20141216 |3 |
|10011993 |20141217 |20141218 |2 |
|10051209 |20141214 |20141215 |2 |
|10088967 |20141214 |20141218 |5 |
|10095384 |20141214 |20141218 |5 |
|10142625 |20141214 |20141218 |5 |
|10176801 |20141214 |20141218 |5 |
|10184463 |20141214 |20141218 |5 |
|10216314 |20141214 |20141218 |5 |
|10231590 |20141214 |20141217 |4 |
|10238478 |20141214 |20141218 |5 |
|10258551 |20141214 |20141218 |5 |
|10260285 |20141214 |20141215 |2 |
|10357344 |20141217 |20141218 |2 |
|10357743 |20141214 |20141217 |4 |
|10388376 |20141214 |20141215 |2 |
|100029775|20141214 |20141218 |5 |
+---------+--------------+--------------+---+
only showing top 20 rows