测试数据
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 1 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 2 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 3 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 4 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 5 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 6 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-03 16:47:38:000 6.6.6 android 7 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 1 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 2 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 3 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 4 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 5 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 6 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 8 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 9 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 10 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 11 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 12 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 13 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-04 16:47:38:000 6.6.6 android 14 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-05 16:47:38:000 6.6.6 android 1 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-05 16:47:38:000 6.6.6 android 2 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-05 16:47:38:000 6.6.6 android 3 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-05 16:47:38:000 6.6.6 android 4 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-05 16:47:38:000 6.6.6 android 5 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-06 16:47:38:000 6.6.6 android 1 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-06 16:47:38:000 6.6.6 android 2 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-06 16:47:38:000 6.6.6 android 3 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-06 16:47:38:000 6.6.6 android 4 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-07 16:47:38:000 6.6.6 android 1 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-07 16:47:38:000 6.6.6 android 2 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-07 16:47:38:000 6.6.6 android 3 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-08 16:47:38:000 6.6.6 android 1 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-08 16:47:38:000 6.6.6 android 2 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 8 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 9 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 10 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 11 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 12 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 13 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2018-12-15 16:47:38:000 6.6.6 android 14 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 8 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 9 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 10 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 11 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 12 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 13 startApp msgsssssssss page3 1542358058000 1542358058000 1
channel1 appkey1 2019-01-01 16:47:38:000 6.6.6 android 14 startApp msgsssssssss page3 1542358058000 1542358058000 1
代码以及结果
import org.apache.spark.sql.SparkSession object RemainTest { def remainDay (spark :SparkSession, eventPath : String): Unit ={ val eventinfoDF = spark.read.format("parquet").load(eventPath) eventinfoDF.createOrReplaceTempView("eventinfo") /** * SQL思路: * 第一步:拿到用户的所有登录时间 * 第二步:增加一列用户的首次登陆时间 * 第三部:增加一列用户登录的时间差 * 第四部:增加几列case when 获取业务留存天数 * */ val remainSql = "SELECT appkey, platform, appver, channel, first_day," + "sum(case when by_day = 0 then 1 else 0 end) day_0," + "sum(case when by_day = 1 then 1 else 0 end) day_1," + "sum(case when by_day = 2 then 1 else 0 end) day_2," + "sum(case when by_day = 3 then 1 else 0 end) day_3," + "sum(case when by_day = 4 then 1 else 0 end) day_4," + "sum(case when by_day = 5 then 1 else 0 end) day_5," + "sum(case when by_day = 6 then 1 else 0 end) day_6," + "sum(case when by_day = 7 then 1 else 0 end) day_7," + "sum(case when by_day = 14 then 1 else 0 end) day_14," + "sum(case when by_day = 30 then 1 else 0 end) day_30 "+ " FROM " + "(" + "SELECT appkey, platform, appver, channel, udid, first_day, datediff(login_time,first_day) as by_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,时间差,并按照这些字段分组 "FROM " + "(SELECT b.appkey, b.platform, b.appver, b.channel, b.udid, b.login_time,c.first_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,并按照这些字段分组 "FROM " + "(SELECT appkey,platform,appver,channel,udid,date login_time FROM eventinfo GROUP BY 1,2,3,4,5,6) b " + //获取应用、平台、版本、渠道、用户、所有登录时间,并按照这些字段分组 "LEFT JOIN (" + "SELECT appkey,platform,appver,channel,udid,min(login_time) first_day " + //获取应用、平台、版本、渠道、用户、首次登陆时间,并按照这些字段分组 "FROM (select appkey,platform,appver,channel,udid,date login_time FROM eventinfo group by 1,2,3,4,5,6) a group by 1,2,3,4,5 " + ") c " + "on b.udid = c.udid order by 1,2,3,4,5,6) e order by 1,2,3,4,5,6" + ") f " + "group by 1,2,3,4,5 order by 1,2,3,4,5" spark.sql(remainSql).show(1000,false) } /* * +-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ |appkey |platform|appver|channel |first_day |day_0|day_1|day_2|day_3|day_4|day_5|day_6|day_7|day_14|day_30| +-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ |appkey1|android |6.6.6 |channel1|2018-12-03|7 |6 |5 |4 |3 |2 |0 |0 |0 |0 | |appkey1|android |6.6.6 |channel1|2018-12-04|7 |0 |0 |0 |0 |0 |0 |0 |0 |0 | +-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ * */ def remainWeek (spark :SparkSession, eventPath : String): Unit ={ val eventinfoDF = spark.read.format("parquet").load(eventPath) eventinfoDF.createOrReplaceTempView("eventinfo") val remainSql = "SELECT appkey, platform, appver, channel, first_week," + "sum(case when by_day = 0 then 1 else 0 end) day_0," + "sum(case when by_day = 1 then 1 else 0 end) day_1," + "sum(case when by_day = 2 then 1 else 0 end) day_2," + "sum(case when by_day = 3 then 1 else 0 end) day_3," + "sum(case when by_day = 4 then 1 else 0 end) day_4," + "sum(case when by_day = 5 then 1 else 0 end) day_5," + "sum(case when by_day = 6 then 1 else 0 end) day_6," + "sum(case when by_day = 7 then 1 else 0 end) day_7," + "sum(case when by_day = 14 then 1 else 0 end) day_14," + "sum(case when by_day = 30 then 1 else 0 end) day_30 "+ " FROM " + "(" + "SELECT appkey, platform, appver, channel, udid, first_week, (weekofyear(login_time)-weekofyear(first_week)) as by_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,时间差,并按照这些字段分组 "FROM " + "(SELECT b.appkey, b.platform, b.appver, b.channel, b.udid, b.login_time,c.first_week " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,并按照这些字段分组 "FROM " + "(SELECT appkey,platform,appver,channel,udid,week login_time FROM eventinfo GROUP BY 1,2,3,4,5,6) b " + //获取应用、平台、版本、渠道、用户、所有登录时间,并按照这些字段分组 "LEFT JOIN (" + "SELECT appkey,platform,appver,channel,udid,min(login_time) first_week " + //获取应用、平台、版本、渠道、用户、首次登陆时间,并按照这些字段分组 "FROM (select appkey,platform,appver,channel,udid,week login_time FROM eventinfo group by 1,2,3,4,5,6) a group by 1,2,3,4,5 " + ") c " + "on b.udid = c.udid order by 1,2,3,4,5,6) e order by 1,2,3,4,5,6" + ") f " + "group by 1,2,3,4,5 order by 1,2,3,4,5" spark.sql(remainSql).show(1000,false) } /* +-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ |appkey |platform|appver|channel |first_week|day_0|day_1|day_2|day_3|day_4|day_5|day_6|day_7|day_14|day_30| +-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ |appkey1|android |6.6.6 |channel1|2018-12-02|14 |7 |0 |0 |7 |0 |0 |0 |0 |0 | +-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ */ def remainMonth (spark :SparkSession, eventPath : String): Unit ={ val eventinfoDF = spark.read.format("parquet").load(eventPath) eventinfoDF.createOrReplaceTempView("eventinfo") val remainSql = "SELECT appkey, platform, appver, channel, first_month," + "sum(case when by_day = 0 then 1 else 0 end) day_0," + "sum(case when by_day = 1 then 1 else 0 end) day_1," + "sum(case when by_day = 2 then 1 else 0 end) day_2," + "sum(case when by_day = 3 then 1 else 0 end) day_3," + "sum(case when by_day = 4 then 1 else 0 end) day_4," + "sum(case when by_day = 5 then 1 else 0 end) day_5," + "sum(case when by_day = 6 then 1 else 0 end) day_6," + "sum(case when by_day = 7 then 1 else 0 end) day_7," + "sum(case when by_day = 14 then 1 else 0 end) day_14," + "sum(case when by_day = 30 then 1 else 0 end) day_30 "+ " FROM " + "(" + "SELECT appkey, platform, appver, channel, udid, first_month, months_between(login_time,first_month) as by_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,时间差,并按照这些字段分组 "FROM " + "(SELECT b.appkey, b.platform, b.appver, b.channel, b.udid, b.login_time,c.first_month " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,并按照这些字段分组 "FROM " + "(SELECT appkey,platform,appver,channel,udid,month login_time FROM eventinfo GROUP BY 1,2,3,4,5,6) b " + //获取应用、平台、版本、渠道、用户、所有登录时间,并按照这些字段分组 "LEFT JOIN (" + "SELECT appkey,platform,appver,channel,udid,min(login_time) first_month " + //获取应用、平台、版本、渠道、用户、首次登陆时间,并按照这些字段分组 "FROM (select appkey,platform,appver,channel,udid,month login_time FROM eventinfo group by 1,2,3,4,5,6) a group by 1,2,3,4,5 " + ") c " + "on b.udid = c.udid order by 1,2,3,4,5,6) e order by 1,2,3,4,5,6" + ") f " + "group by 1,2,3,4,5 order by 1,2,3,4,5" spark.sql(remainSql).show(1000,false) } /* +-------+--------+------+--------+-----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ |appkey |platform|appver|channel |first_month|day_0|day_1|day_2|day_3|day_4|day_5|day_6|day_7|day_14|day_30| +-------+--------+------+--------+-----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ |appkey1|android |6.6.6 |channel1|2018-12 |14 |7 |0 |0 |0 |0 |0 |0 |0 |0 | +-------+--------+------+--------+-----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+ */ }