用sparkSql计算留存用户

测试数据

channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    1    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    2    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    3    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    4    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    5    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    6    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-03 16:47:38:000    6.6.6    android    7    startApp    msgsssssssss    page3    1542358058000    1542358058000    1


channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    1    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    2    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    3    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    4    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    5    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    6    startApp    msgsssssssss    page3    1542358058000    1542358058000    1

channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    8    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    9    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    10    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    11    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    12    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    13    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-04 16:47:38:000    6.6.6    android    14    startApp    msgsssssssss    page3    1542358058000    1542358058000    1

channel1    appkey1    2018-12-05 16:47:38:000    6.6.6    android    1    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-05 16:47:38:000    6.6.6    android    2    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-05 16:47:38:000    6.6.6    android    3    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-05 16:47:38:000    6.6.6    android    4    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-05 16:47:38:000    6.6.6    android    5    startApp    msgsssssssss    page3    1542358058000    1542358058000    1


channel1    appkey1    2018-12-06 16:47:38:000    6.6.6    android    1    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-06 16:47:38:000    6.6.6    android    2    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-06 16:47:38:000    6.6.6    android    3    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-06 16:47:38:000    6.6.6    android    4    startApp    msgsssssssss    page3    1542358058000    1542358058000    1

channel1    appkey1    2018-12-07 16:47:38:000    6.6.6    android    1    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-07 16:47:38:000    6.6.6    android    2    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-07 16:47:38:000    6.6.6    android    3    startApp    msgsssssssss    page3    1542358058000    1542358058000    1


channel1    appkey1    2018-12-08 16:47:38:000    6.6.6    android    1    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-08 16:47:38:000    6.6.6    android    2    startApp    msgsssssssss    page3    1542358058000    1542358058000    1

channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    8    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    9    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    10    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    11    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    12    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    13    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2018-12-15 16:47:38:000    6.6.6    android    14    startApp    msgsssssssss    page3    1542358058000    1542358058000    1


channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    8    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    9    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    10    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    11    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    12    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    13    startApp    msgsssssssss    page3    1542358058000    1542358058000    1
channel1    appkey1    2019-01-01 16:47:38:000    6.6.6    android    14    startApp    msgsssssssss    page3    1542358058000    1542358058000    1

代码以及结果

import org.apache.spark.sql.SparkSession

object RemainTest {
  def remainDay (spark :SparkSession, eventPath : String): Unit ={
    val eventinfoDF = spark.read.format("parquet").load(eventPath)
    eventinfoDF.createOrReplaceTempView("eventinfo")

    /**
      * SQL思路:
      * 第一步:拿到用户的所有登录时间
      * 第二步:增加一列用户的首次登陆时间
      * 第三部:增加一列用户登录的时间差
      * 第四部:增加几列case when 获取业务留存天数
      *
      */
    val remainSql = "SELECT appkey, platform, appver, channel, first_day," +
      "sum(case when by_day = 0 then 1 else 0 end) day_0," +
      "sum(case when by_day = 1 then 1 else 0 end) day_1," +
      "sum(case when by_day = 2 then 1 else 0 end) day_2," +
      "sum(case when by_day = 3 then 1 else 0 end) day_3," +
      "sum(case when by_day = 4 then 1 else 0 end) day_4," +
      "sum(case when by_day = 5 then 1 else 0 end) day_5," +
      "sum(case when by_day = 6 then 1 else 0 end) day_6," +
      "sum(case when by_day = 7 then 1 else 0 end) day_7," +
      "sum(case when by_day = 14 then 1 else 0 end) day_14," +
      "sum(case when by_day = 30 then 1 else 0 end) day_30 "+
      " FROM " +
      "(" +
          "SELECT  appkey, platform, appver, channel, udid, first_day, datediff(login_time,first_day) as by_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,时间差,并按照这些字段分组
          "FROM " +
                  "(SELECT b.appkey, b.platform, b.appver, b.channel, b.udid, b.login_time,c.first_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,并按照这些字段分组
                   "FROM " +
                          "(SELECT appkey,platform,appver,channel,udid,date login_time FROM eventinfo GROUP BY 1,2,3,4,5,6) b " + //获取应用、平台、版本、渠道、用户、所有登录时间,并按照这些字段分组
                   "LEFT JOIN (" +
                          "SELECT appkey,platform,appver,channel,udid,min(login_time) first_day " + //获取应用、平台、版本、渠道、用户、首次登陆时间,并按照这些字段分组
                                                 "FROM (select appkey,platform,appver,channel,udid,date login_time FROM eventinfo group by 1,2,3,4,5,6) a group by 1,2,3,4,5 " +
                              ") c " +
                   "on b.udid = c.udid order by 1,2,3,4,5,6) e order by 1,2,3,4,5,6" +
      ") f " +
      "group by 1,2,3,4,5 order by 1,2,3,4,5"
    spark.sql(remainSql).show(1000,false)
  }

  /*
  *
+-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
|appkey |platform|appver|channel |first_day |day_0|day_1|day_2|day_3|day_4|day_5|day_6|day_7|day_14|day_30|
+-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
|appkey1|android |6.6.6 |channel1|2018-12-03|7    |6    |5    |4    |3    |2    |0    |0    |0     |0     |
|appkey1|android |6.6.6 |channel1|2018-12-04|7    |0    |0    |0    |0    |0    |0    |0    |0     |0     |
+-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
  * */

  def remainWeek (spark :SparkSession, eventPath : String): Unit ={
    val eventinfoDF = spark.read.format("parquet").load(eventPath)
    eventinfoDF.createOrReplaceTempView("eventinfo")
    val remainSql = "SELECT appkey, platform, appver, channel, first_week," +
      "sum(case when by_day = 0 then 1 else 0 end) day_0," +
      "sum(case when by_day = 1 then 1 else 0 end) day_1," +
      "sum(case when by_day = 2 then 1 else 0 end) day_2," +
      "sum(case when by_day = 3 then 1 else 0 end) day_3," +
      "sum(case when by_day = 4 then 1 else 0 end) day_4," +
      "sum(case when by_day = 5 then 1 else 0 end) day_5," +
      "sum(case when by_day = 6 then 1 else 0 end) day_6," +
      "sum(case when by_day = 7 then 1 else 0 end) day_7," +
      "sum(case when by_day = 14 then 1 else 0 end) day_14," +
      "sum(case when by_day = 30 then 1 else 0 end) day_30 "+
      " FROM " +
      "(" +
      "SELECT  appkey, platform, appver, channel, udid, first_week, (weekofyear(login_time)-weekofyear(first_week)) as by_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,时间差,并按照这些字段分组
      "FROM " +
      "(SELECT b.appkey, b.platform, b.appver, b.channel, b.udid, b.login_time,c.first_week " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,并按照这些字段分组
      "FROM " +
      "(SELECT appkey,platform,appver,channel,udid,week login_time FROM eventinfo GROUP BY 1,2,3,4,5,6) b " + //获取应用、平台、版本、渠道、用户、所有登录时间,并按照这些字段分组
      "LEFT JOIN (" +
      "SELECT appkey,platform,appver,channel,udid,min(login_time) first_week " + //获取应用、平台、版本、渠道、用户、首次登陆时间,并按照这些字段分组
      "FROM (select appkey,platform,appver,channel,udid,week login_time FROM eventinfo group by 1,2,3,4,5,6) a group by 1,2,3,4,5 " +
      ") c " +
      "on b.udid = c.udid order by 1,2,3,4,5,6) e order by 1,2,3,4,5,6" +
      ") f " +
      "group by 1,2,3,4,5 order by 1,2,3,4,5"
    spark.sql(remainSql).show(1000,false)
  }
  /*
+-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
|appkey |platform|appver|channel |first_week|day_0|day_1|day_2|day_3|day_4|day_5|day_6|day_7|day_14|day_30|
+-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
|appkey1|android |6.6.6 |channel1|2018-12-02|14   |7    |0    |0    |7    |0    |0    |0    |0     |0     |
+-------+--------+------+--------+----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
   */

  def remainMonth (spark :SparkSession, eventPath : String): Unit ={
    val eventinfoDF = spark.read.format("parquet").load(eventPath)
    eventinfoDF.createOrReplaceTempView("eventinfo")
    val remainSql = "SELECT appkey, platform, appver, channel, first_month," +
      "sum(case when by_day = 0 then 1 else 0 end) day_0," +
      "sum(case when by_day = 1 then 1 else 0 end) day_1," +
      "sum(case when by_day = 2 then 1 else 0 end) day_2," +
      "sum(case when by_day = 3 then 1 else 0 end) day_3," +
      "sum(case when by_day = 4 then 1 else 0 end) day_4," +
      "sum(case when by_day = 5 then 1 else 0 end) day_5," +
      "sum(case when by_day = 6 then 1 else 0 end) day_6," +
      "sum(case when by_day = 7 then 1 else 0 end) day_7," +
      "sum(case when by_day = 14 then 1 else 0 end) day_14," +
      "sum(case when by_day = 30 then 1 else 0 end) day_30 "+
      " FROM " +
      "(" +
      "SELECT  appkey, platform, appver, channel, udid, first_month, months_between(login_time,first_month) as by_day " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,时间差,并按照这些字段分组
      "FROM " +
      "(SELECT b.appkey, b.platform, b.appver, b.channel, b.udid, b.login_time,c.first_month " + //获取应用、平台、版本、渠道、用户、所有登录时间、首次登陆时间,并按照这些字段分组
      "FROM " +
      "(SELECT appkey,platform,appver,channel,udid,month login_time FROM eventinfo GROUP BY 1,2,3,4,5,6) b " + //获取应用、平台、版本、渠道、用户、所有登录时间,并按照这些字段分组
      "LEFT JOIN (" +
      "SELECT appkey,platform,appver,channel,udid,min(login_time) first_month " + //获取应用、平台、版本、渠道、用户、首次登陆时间,并按照这些字段分组
      "FROM (select appkey,platform,appver,channel,udid,month login_time FROM eventinfo group by 1,2,3,4,5,6) a group by 1,2,3,4,5 " +
      ") c " +
      "on b.udid = c.udid order by 1,2,3,4,5,6) e order by 1,2,3,4,5,6" +
      ") f " +
      "group by 1,2,3,4,5 order by 1,2,3,4,5"
    spark.sql(remainSql).show(1000,false)
  }
/*
+-------+--------+------+--------+-----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
|appkey |platform|appver|channel |first_month|day_0|day_1|day_2|day_3|day_4|day_5|day_6|day_7|day_14|day_30|
+-------+--------+------+--------+-----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
|appkey1|android |6.6.6 |channel1|2018-12    |14   |7    |0    |0    |0    |0    |0    |0    |0     |0     |
+-------+--------+------+--------+-----------+-----+-----+-----+-----+-----+-----+-----+-----+------+------+
 */

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值