蚂蚁金服大数据面试题-蚂蚁森林

----题目

一.蚂蚁森林植物申领统计

问题:假设2017年1月1日开始记录低碳数据(user_low_carbon),假设2017年10月1日之前满足申领条件的用户都申领了一颗p004-胡杨,剩余的能量全部用来领取“p002-沙柳” 。统计在10月1日累计申领“p002-沙柳” 排名前10的用户信息;以及他比后一名多领了几颗沙柳。

得到的统计结果如下表样式:

user_id plant_count less_count(比后一名多领了几颗沙柳)

u_101 1000 100

u_088 900 400

u_103 500 …

1.创建表

create table user_low_carbon(user_id String,data_dt String,low_carbon int) row format delimited fields terminated by '\t';
​
create table plant_carbon(plant_id string,plant_name String,low_carbon int) row format delimited fields terminated by '\t';

2.加载数据

load data local inpath "/opt/module/hive/datas/user_low_carbon.txt" into table user_low_carbon;
​
load data local inpath "/opt/module/hive/datas/plant_carbon.txt" into table plant_carbon;

3.--转化时间字段

select
 user_id,
 regexp_replace(data_dt,'/','-'),
 low_carbon 
from user_low_carbon
 where regexp_replace(data_dt,'/','-') <'2017-10-01' t1

4.-- 求能量总和

select
 t1.user_id,
 sum(t1.low_carbon)-(select low_carbon from plant_carbon where plant_name='胡杨') low_carbon_sum
from (
     select
      user_id,
      regexp_replace(data_dt,'/','-'),
      low_carbon 
     from user_low_carbon
      where regexp_replace(data_dt,'/','-') <'2017-10-01'
)t1
group by t1.user_id   t2

5.--求出比后一名多领了几棵

select
  t2.user_id,
  t2.low_carbon_sum
  from(
select
  t1.userId,
  sum(t1.low_carbon)-(select low_carbon from plant_carbon where plant_name='胡杨')
  from(
  select
  user_id,
  regexp_replace(data_dt,'/','-')
  low_carbon
  from user_id_carbon
  where regexp_rplace(data_dt,'/','-')<'2017-10-1' 
  ) t1
  group by t1.user_id )
  t2
 )
  where t2.low_carbon_sum>0
  order by t2.low_carbon_sum desc
  limit 11
  ) t3

6.答案

select 
 t4.user_id,
 t4.plant_count,
 t4.plant_count-lead(t4.plant_count,1,0) over(order by t4.plant_count desc) less_count
 from (
    select
     t3.user_id,
     floor(t3.low_carbon_sum/(select low_carbon from plant_carbon where plant_name='沙柳')) plant_count
    from(
         select
         t2.user_id,
         t2.low_carbon_sum
         from (
              select
               t1.user_id,
               sum(t1.low_carbon)-(select low_carbon from plant_carbon where plant_name='胡杨') low_carbon_sum
              from (
                   select
                    user_id,
                    regexp_replace(data_dt,'/','-'),
                    low_carbon 
                   from user_low_carbon
                    where regexp_replace(data_dt,'/','-') <'2017-10-01'
              )t1
              group by t1.user_id
         )t2
         where t2.low_carbon_sum>0
         order by  t2.low_carbon_sum desc 
         limit 11
    ) t3
)t4

结果:

| t4.user_id  | t4.plant_count  | less_count  |
+-------------+-----------------+-------------+
| u_007       | 66              | 3           |
| u_013       | 63              | 10          |
| u_008       | 53              | 7           |
| u_005       | 46              | 1           |
| u_010       | 45              | 1           |
| u_014       | 44              | 5           |
| u_011       | 39              | 2           |
| u_009       | 37              | 5           |
| u_006       | 32              | 9           |
| u_002       | 23              | 1           |
| u_004       | 22              | 22          |
+-------------+-----------------+-------------+

二、蚂蚁森林低碳用户排名分析

问题:查询user_low_carbon表中每日流水记录,条件为:

用户在2017年,连续三天(或以上)的天数里,每天减少碳排放(low_carbon)都超过100g的用户低碳流水。

需要查询返回满足以上条件的user_low_carbon表中的记录流水。

例如用户u_002符合条件的记录如下,因为2017/1/2~2017/1/5连续四天的碳排放量之和都大于等于100g:

seq(key) user_id data_dt low_carbon

xxxxx10 u_002 2017/1/2 150

xxxxx11 u_002 2017/1/2 70

xxxxx12 u_002 2017/1/3 30

xxxxx13 u_002 2017/1/3 80

xxxxx14 u_002 2017/1/4 150

xxxxx14 u_002 2017/1/5 101

备注:统计方法不限于sql、procedure、python,java等

方法一:

--1.查找到在2017年的用户
select
 user_id,
 regexp_replace(data_dt,'/','-') data_dt,
 sum(low_carbon) low_carbon_sum 
from user_low_carbon
 where year(regexp_replace(data_dt,'/','-')) ='2017'
group by user_id,regexp_replace(data_dt,'/','-') t1
​
--2.筛选2017年的用户里碳排放超过100g的用户
select
 t1.user_id,
 t1.data_dt,
 t1.low_carbon_sum
from (
      select
       user_id,
       regexp_replace(data_dt,'/','-') data_dt,
       sum(low_carbon) low_carbon_sum 
      from user_low_carbon
       where year(regexp_replace(data_dt,'/','-')) ='2017'
      group by user_id,regexp_replace(data_dt,'/','-')
      )t1
where t1.low_carbon_sum>=100  t2
​
2021-2-1  2021-2-2  2021-2-3
2021-2-2  2021-2-3  2021-2-4
2021-2-3  2021-2-4  2021-2-5
​
--3.通过lag和lead开窗函数筛选第三天和第二天碳排放达标的用户,分为今天,昨天,前天,明天,后天
select
 t2.user_id,
 t2.low_carbon_sum,
 lag(t2.data_dt,2,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) qt,
 lag(t2.data_dt,1,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) zt,
 t2.data_dt,
 lead(t2.data_dt,1,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) mt,
 lead(t2.data_dt,2,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) ht
from (
     select
      t1.user_id,
      t1.data_dt,
      t1.low_carbon_sum
     from (
           select
            user_id,
            regexp_replace(data_dt,'/','-') data_dt,
            sum(low_carbon) low_carbon_sum 
           from user_low_carbon
            where year(regexp_replace(data_dt,'/','-')) ='2017'
           group by user_id,regexp_replace(data_dt,'/','-')
           )t1
     where t1.low_carbon_sum>=100 
)t2   t3
​
--4.判断 天数是否正确,今天-前天,今天-昨天,今天-明天,今天-后天这四个的低碳能量是否大于100g
select
 t3.user_id,
 t3.data_dt,
 t3.low_carbon_sum,
 datediff(t3.data_dt,t3.qt) jt_qt,
 datediff(t3.data_dt,t3.zt) jt_zt,
 datediff(t3.data_dt,t3.mt) jt_mt,
 datediff(t3.data_dt,t3.ht) jt_ht
from (
     select
      t2.user_id,
      t2.low_carbon_sum,
      lag(t2.data_dt,2,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) qt,
      lag(t2.data_dt,1,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) zt,
      t2.data_dt,
      lead(t2.data_dt,1,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) mt,
      lead(t2.data_dt,2,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) ht
     from (
          select
           t1.user_id,
           t1.data_dt,
           t1.low_carbon_sum
          from (
                select
                 user_id,
                 regexp_replace(data_dt,'/','-') data_dt,
                 sum(low_carbon) low_carbon_sum 
                from user_low_carbon
                 where year(regexp_replace(data_dt,'/','-')) ='2017'
                group by user_id,regexp_replace(data_dt,'/','-')
                )t1
          where t1.low_carbon_sum>=100 
     )t2 
)t3  t4
​
​
--5.查询减少碳排放大于等于两天的用户
select
 t4.user_id,
 t4.data_dt,
 t4.low_carbon_sum
from (
    select
     t3.user_id,
     t3.data_dt,
     t3.low_carbon_sum,
     datediff(t3.data_dt,t3.qt) jt_qt,
     datediff(t3.data_dt,t3.zt) jt_zt,
     datediff(t3.data_dt,t3.mt) jt_mt,
     datediff(t3.data_dt,t3.ht) jt_ht
    from (
         select
          t2.user_id,
          t2.low_carbon_sum,
          lag(t2.data_dt,2,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) qt,
          lag(t2.data_dt,1,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) zt,
          t2.data_dt,
          lead(t2.data_dt,1,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) mt,
          lead(t2.data_dt,2,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) ht
         from (
              select
               t1.user_id,
               t1.data_dt,
               t1.low_carbon_sum
              from (
                    select
                     user_id,
                     regexp_replace(data_dt,'/','-') data_dt,
                     sum(low_carbon) low_carbon_sum 
                    from user_low_carbon
                     where year(regexp_replace(data_dt,'/','-')) ='2017'
                    group by user_id,regexp_replace(data_dt,'/','-')
                    )t1
              where t1.low_carbon_sum>=100 
         )t2 
    )t3 
    )t4
where (t4.jt_qt =2 and t4.jt_zt=1)or(t4.jt_zt =1 and t4.jt_mt=-1)or(t4.jt_mt =-1 and t4.jt_ht=-2) t5
​
--6.与(user_low_carbon)相连接
select
 t6.user_id,
 t6.data_dt,
 t6.low_carbon
from(
    select
     t4.user_id,
     t4.data_dt,
     t4.low_carbon_sum
    from (
        select
         t3.user_id,
         t3.data_dt,
         t3.low_carbon_sum,
         datediff(t3.data_dt,t3.qt) jt_qt,
         datediff(t3.data_dt,t3.zt) jt_zt,
         datediff(t3.data_dt,t3.mt) jt_mt,
         datediff(t3.data_dt,t3.ht) jt_ht
        from (
             select
              t2.user_id,
              t2.low_carbon_sum,
              lag(t2.data_dt,2,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) qt,
              lag(t2.data_dt,1,'0000-00-00')over(partition by t2.user_id order by t2.data_dt) zt,
              t2.data_dt,
              lead(t2.data_dt,1,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) mt,
              lead(t2.data_dt,2,'9999-99-99')over(partition by t2.user_id order by t2.data_dt) ht
             from (
                  select
                   t1.user_id,
                   t1.data_dt,
                   t1.low_carbon_sum
                  from (
                        select
                         user_id,
                         regexp_replace(data_dt,'/','-') data_dt,
                         sum(low_carbon) low_carbon_sum 
                        from user_low_carbon
                         where year(regexp_replace(data_dt,'/','-')) ='2017'
                        group by user_id,regexp_replace(data_dt,'/','-')
                        )t1
                  where t1.low_carbon_sum>=100 
             )t2 
        )t3 
        )t4
    where (t4.jt_qt =2 and t4.jt_zt=1)or(t4.jt_zt =1 and t4.jt_mt=-1)or(t4.jt_mt =-1 and t4.jt_ht=-2)
) t5 join (
           select
            user_id,
            regexp_replace(data_dt,'/','-') data_dt,
            low_carbon
           from user_low_carbon
            where year(regexp_replace(data_dt,'/','-')) ='2017'
)t6
on t5.user_id=t6.user_id and t5.data_dt=t6.data_dt

结果:

+-------------+-------------+----------------+
| t6.user_id  | t6.data_dt  | t6.low_carbon  |
+-------------+-------------+----------------+
| u_002       | 2017-1-2    | 150            |
| u_002       | 2017-1-2    | 70             |
| u_002       | 2017-1-3    | 30             |
| u_002       | 2017-1-3    | 80             |
| u_002       | 2017-1-4    | 150            |
| u_002       | 2017-1-5    | 101            |
| u_005       | 2017-1-2    | 50             |
| u_005       | 2017-1-2    | 80             |
| u_005       | 2017-1-3    | 180            |
| u_005       | 2017-1-4    | 180            |
| u_005       | 2017-1-4    | 10             |
| u_008       | 2017-1-4    | 260            |
| u_008       | 2017-1-5    | 360            |
| u_008       | 2017-1-6    | 160            |
| u_008       | 2017-1-7    | 60             |
| u_008       | 2017-1-7    | 60             |
| u_009       | 2017-1-2    | 70             |
| u_009       | 2017-1-2    | 70             |
| u_009       | 2017-1-3    | 170            |
| u_009       | 2017-1-4    | 270            |
| u_010       | 2017-1-4    | 90             |
| u_010       | 2017-1-4    | 80             |
| u_010       | 2017-1-5    | 90             |
| u_010       | 2017-1-5    | 90             |
| u_010       | 2017-1-6    | 190            |
| u_010       | 2017-1-7    | 90             |
| u_010       | 2017-1-7    | 90             |
| u_011       | 2017-1-1    | 110            |
| u_011       | 2017-1-2    | 100            |
| u_011       | 2017-1-2    | 100            |
| u_011       | 2017-1-3    | 120            |
| u_011       | 2017-1-4    | 100            |
| u_011       | 2017-1-5    | 100            |
| u_011       | 2017-1-6    | 100            |
| u_011       | 2017-1-7    | 130            |
| u_011       | 2017-1-7    | 100            |
| u_013       | 2017-1-2    | 150            |
| u_013       | 2017-1-2    | 50             |
| u_013       | 2017-1-3    | 150            |
| u_013       | 2017-1-4    | 550            |
| u_013       | 2017-1-5    | 350            |
| u_014       | 2017-1-5    | 250            |
| u_014       | 2017-1-6    | 120            |
| u_014       | 2017-1-7    | 270            |
| u_014       | 2017-1-7    | 20             |
+-------------+-------------+----------------+

方法二:

select
 t1.user_id,
 t1.data_dt,
 t1.low_carbon_sum,
 rank()over(partition by t1.user_id order by t1.data_dt) rk
from (
      select
       user_id,
       regexp_replace(data_dt,'/','-') data_dt,
       sum(low_carbon) low_carbon_sum 
      from user_low_carbon
       where year(regexp_replace(data_dt,'/','-')) ='2017'
      group by user_id,regexp_replace(data_dt,'/','-')
      )t1
where t1.low_carbon_sum>=100  t2
​
--等差数列less_dt
select
 t2.user_id,
 t2.data_dt,
 t2.low_carbon_sum,
 date_sub(t2.data_dt,t2.rk) less_dt
from (
     select
      t1.user_id,
      t1.data_dt,
      t1.low_carbon_sum,
      rank()over(partition by t1.user_id order by t1.data_dt) rk
     from (
           select
            user_id,
            regexp_replace(data_dt,'/','-') data_dt,
            sum(low_carbon) low_carbon_sum 
           from user_low_carbon
            where year(regexp_replace(data_dt,'/','-')) ='2017'
           group by user_id,regexp_replace(data_dt,'/','-')
           )t1
     where t1.low_carbon_sum>=100
)t2  t3
​
​
select
 t3.user_id,
 t3.data_dt,
 t3.low_carbon_sum,
 count(*)over(partition by t3.user_id,t3.less_dt) c_dt
from (
    select
     t2.user_id,
     t2.data_dt,
     t2.low_carbon_sum,
     date_sub(t2.data_dt,t2.rk) less_dt
    from (
         select
          t1.user_id,
          t1.data_dt,
          t1.low_carbon_sum,
          rank()over(partition by t1.user_id order by t1.data_dt) rk
         from (
               select
                user_id,
                regexp_replace(data_dt,'/','-') data_dt,
                sum(low_carbon) low_carbon_sum 
               from user_low_carbon
                where year(regexp_replace(data_dt,'/','-')) ='2017'
               group by user_id,regexp_replace(data_dt,'/','-')
               )t1
         where t1.low_carbon_sum>=100
    )t2
)t3  t4
​
​
​
select
t4.user_id,
t4.data_dt,
t4.low_carbon_sum
from (
    select
     t3.user_id,
     t3.data_dt,
     t3.low_carbon_sum,
     count(*)over(partition by t3.user_id,t3.less_dt) c_dt
    from (
        select
         t2.user_id,
         t2.data_dt,
         t2.low_carbon_sum,
         date_sub(t2.data_dt,t2.rk) less_dt
        from (
             select
              t1.user_id,
              t1.data_dt,
              t1.low_carbon_sum,
              rank()over(partition by t1.user_id order by t1.data_dt) rk
             from (
                   select
                    user_id,
                    regexp_replace(data_dt,'/','-') data_dt,
                    sum(low_carbon) low_carbon_sum 
                   from user_low_carbon
                    where year(regexp_replace(data_dt,'/','-')) ='2017'
                   group by user_id,regexp_replace(data_dt,'/','-')
                   )t1
             where t1.low_carbon_sum>=100
        )t2
    )t3
)t4
where c_dt>=3

提供的数据说明:

user_low_carbon:

u_001 2017/1/1 10

u_001 2017/1/2 150

u_001 2017/1/2 110

u_001 2017/1/2 10

u_001 2017/1/4 50

u_001 2017/1/4 10

u_001 2017/1/6 45

u_001 2017/1/6 90

u_002 2017/1/1 10

u_002 2017/1/2 150

u_002 2017/1/2 70

u_002 2017/1/3 30

u_002 2017/1/3 80

u_002 2017/1/4 150

u_002 2017/1/5 101

u_002 2017/1/6 68

...

plant_carbon:

p001 梭梭树 17

p002 沙柳 19

p003 樟子树 146

p004 胡杨 215

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值