话单数据仓库搭建（3）- 数据仓库DWS及ADS层_dws层数据直接导入到ads吗-CSDN博客

本文链接：https://blog.csdn.net/qq_42409495/article/details/122169791

1 活跃用户

1.1 DWS层

目标：统计当日、当周、当月活跃用户
活跃用户指的是在统计周期内由过通话记录的用户

1.1.1 每日活跃用户明细

1）建表语句

hive (calllogs)>
drop table if exists dws_au_detail_day;
create table dws_au_detail_day( 
sys_id string,
service_name string,
home_province_code string,
visit_province_code string,
channel_code string,
service_code string,
imsi string,
msisdn string
) COMMENT '活跃用户按天明细'
PARTITIONED BY ( `dt` string)
stored as parquet
location '/warehouse/calllogs/dws/dws_au_detail_day/';

2）数据导入
以用户手机号为key进行聚合

hive (calllogs)>
set hive.exec.dynamic.partition.mode=nonstrict;

insert overwrite table dws_au_detail_day partition(dt)
select  
    collect_set(sys_id)[0] sys_id,
    collect_set(service_name)[0] service_name,
    collect_set(home_province_code)[0] home_province_code,
    collect_set(visit_province_code)[0] visit_province_code,
    collect_set(channel_code)[0] channel_code,
    collect_set(service_code)[0] service_code,
    collect_set(imsi)[0] imsi, 
    msisdn,
    '2021-12-07'
from dwd_calllogs_cdr
where dt='2021-12-07'  
group by msisdn;

3）查询导入结果

hive (calllogs)> select * from dws_au_detail_day limit 5;
hive (calllogs)> select count(*) from dws_au_detail_day;

1.1.2 每周活跃用户明细

1）建表语句

hive (calllogs)>
drop table if exists dws_au_detail_week;
create table dws_au_detail_week( 
sys_id string,
service_name string,
home_province_code string,
visit_province_code string,
channel_code string,
service_code string,
imsi string,
msisdn string,
monday_date string,
sunday_date string
) COMMENT '活跃用户按周明细'
PARTITIONED BY ( `week_dt` string)
stored as parquet
location '/warehouse/calllogs/dws/dws_au_detail_week/';

2）数据导入
以用户手机号为key按周进行聚合

hive (calllogs)>
set hive.exec.dynamic.partition.mode=nonstrict;

insert overwrite table dws_au_detail_week partition(week_dt)
select  
    collect_set(sys_id)[0] sys_id,
    collect_set(service_name)[0] service_name,
    collect_set(home_province_code)[0] home_province_code,
    collect_set(visit_province_code)[0] visit_province_code,
    collect_set(channel_code)[0] channel_code,
    collect_set(service_code)[0] service_code,
    collect_set(imsi)[0] imsi, 
    msisdn,
    date_add(next_day('2021-12-07','MO'),-7),
    date_add(next_day('2021-12-07','MO'),-1),
    concat(date_add(next_day('2021-12-07','MO'),-7),'-',date_add(next_day('2021-12-07','MO'),-1))
from dwd_calllogs_cdr
where dt >= date_add(next_day('2021-12-07','MO'),-7) and dt <= date_add(next_day('2021-12-07','MO'),-1)
group by msisdn;

3）查询导入结果

hive (calllogs)> select * from dws_au_detail_week limit 5;
hive (calllogs)> select count(*) from dws_au_detail_week;

1.1.3 每月活跃用户明细

1）建表语句

hive (calllogs)>
drop table if exists dws_au_detail_month;
create table dws_au_detail_month( 
sys_id string,
service_name string,
home_province_code string,
visit_province_code string,
channel_code string,
service_code string,
imsi string,
msisdn string
) COMMENT '活跃用户按月明细'
PARTITIONED BY ( `month_dt` string)
stored as parquet
location '/warehouse/calllogs/dws/dws_au_detail_month/';

2）数据导入
以用户手机号为key按周进行聚合

hive (calllogs)>
set hive.exec.dynamic.partition.mode=nonstrict;

insert overwrite table dws_au_detail_month partition(month_dt)
select  
    collect_set(sys_id)[0] sys_id,
    collect_set(service_name)[0] service_name,
    collect_set(home_province_code)[0] home_province_code,
    collect_set(visit_province_code)[0] visit_province_code,
    collect_set(channel_code)[0] channel_code,
    collect_set(service_code)[0] service_code,
    collect_set(imsi)[0] imsi, 
    msisdn,
    date_format('2021-12-07','yyyy-MM')
from dwd_calllogs_cdr
where date_format(dt,'yyyy-MM') = date_format('2021-12-07','yyyy-MM')
group by msisdn;

3）查询导入结果

hive (calllogs)> select * from dws_au_detail_month limit 5;
hive (calllogs)> select count(*) from dws_au_detail_month;

1.1.4 DWS层活跃用户加载数据脚本
1）在hadoop101的/home/hadoop/bin目录下创建脚本

[hadoop@hadoop101 bin]$ vim calllogs_dws_au.sh

在脚本中编写如下内容

#!/bin/bash

# 定义变量方便修改
DB=calllogs
hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期；如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
    calllogs_date=$1
else 
    calllogs_date=`date -d "-1 day" +%F`  
fi 


sql="
  set hive.exec.dynamic.partition.mode=nonstrict;

  insert overwrite table "$DB".dws_au_detail_day partition(dt)
  select  
    collect_set(sys_id)[0] sys_id,
    collect_set(service_name)[0] service_name,
    collect_set(home_province_code)[0] home_province_code,
    collect_set(visit_province_code)[0] visit_province_code,
    collect_set(channel_code)[0] channel_code,
    collect_set(service_code)[0] service_code,
    collect_set(imsi)[0] imsi, 
    msisdn,
    '$calllogs_date'
  from "$DB".dwd_calllogs_cdr
  where dt='$calllogs_date'  
  group by msisdn;
  
  insert overwrite table "$DB".dws_au_detail_week partition(week_dt)
  select  
    collect_set(sys_id)[0] sys_id,
    collect_set(service_name)[0] service_name,
    collect_set(home_province_code)[0] home_province_code,
    collect_set(visit_province_code)[0] visit_province_code,
    collect_set(channel_code)[0] channel_code,
    collect_set(service_code)[0] service_code,
    collect_set(imsi)[0] imsi, 
    msisdn,
    date_add(next_day('$calllogs_date','MO'),-7),
    date_add(next_day('$calllogs_date','MO'),-1),
    concat(date_add(next_day('$calllogs_date','MO'),-7),'_',date_add(next_day('$calllogs_date','MO'),-1))
  from "$DB".dwd_calllogs_cdr
  where dt >= date_add(next_day('$calllogs_date','MO'),-7) and dt <= date_add(next_day('$calllogs_date','MO'),-1)
  group by msisdn;
  
  insert overwrite table "$DB".dws_au_detail_month partition(month_dt)
  select  
    collect_set(sys_id)[0] sys_id,
    collect_set(service_name)[0] service_name,
    collect_set(home_province_code)[0] home_province_code,
    collect_set(visit_province_code)[0] visit_province_code,
    collect_set(channel_code)[0] channel_code,
    collect_set(service_code)[0] service_code,
    collect_set(imsi)[0] imsi, 
    msisdn,
    date_format('$calllogs_date','yyyy-MM')
  from "$DB".dwd_calllogs_cdr
  where date_format(dt,'yyyy-MM') = date_format('$calllogs_date','yyyy-MM')
  group by msisdn;
"

$hive -e "$sql"

2）增加脚本执行权限

[hadoop@hadoop101 bin]$ chmod +x calllogs_dws_au.sh

3）脚本使用

[hadoop@hadoop101 modules]$ calllogs_dws_au.sh 2021-12-08

4）查询结果

hive (calllogs)> select count(*) from dws_au_detail_day;
hive (calllogs)> select count(*) from dws_au_detail_week;
hive (calllogs)> select count(*) from dws_au_detail_month ;

5）脚本执行时间
企业开发中一般在每日凌晨30分~1点

1.2 ADS层

1.2.1 活跃用户数

目标：当日、当周、当月活跃设备数
1）建表语句

hive (calllogs)>
drop table if exists ads_au_count;
create  external table ads_au_count( 
`dt` string COMMENT '统计日期',
`day_count` bigint COMMENT '当日用户数量',
`wk_count` bigint COMMENT '当周用户数量',
`mn_count` bigint COMMENT '当月用户数量',
`is_weekend` string COMMENT 'Y,N是否是周末,用于得到本周最终结果',
`is_monthend` string COMMENT 'Y,N是否是月末,用于得到本月最终结果' 
) COMMENT '每日活跃用户数量'
stored as parquet
location '/warehouse/calllogs/ads/ads_au_count/';

2）导入数据

hive (calllogs)>
insert overwrite table ads_au_count 
select  
  '2021-12-07' dt,
   daycount.ct,
   wkcount.ct,
   mncount.ct,
   if(date_add(next_day('2021-12-07','MO'),-1)='2021-12-07','Y','N') ,
   if(last_day('2021-12-07')='2021-12-07','Y','N') 
from 
(
   select  
      '2021-12-07' dt,
      count(*) ct
   from dws_au_detail_day
   where dt='2021-12-07'  
)daycount join 
( 
   select  
     '2021-12-07' dt,
     count (*) ct
   from dws_au_detail_week
   where week_dt=concat(date_add(next_day('2021-12-07','MO'),-7),'_' ,date_add(next_day('2021-12-07','MO'),-1) )
)  wkcount on daycount.dt=wkcount.dt
join 
( 
   select  
     '2021-12-07' dt,
     count (*) ct
   from dws_au_detail_month
   where month_dt=date_format('2021-12-07','yyyy-MM')  
)mncount on daycount.dt=mncount.dt;

3）查询导入结果

hive (calllogs)> select * from ads_au_count;

1.2.1 活跃用户数ADS层加载数据脚本

1）在hadoop101的/home/hadoop/bin目录下创建脚本

[hadoop@hadoop101 bin]$ vim calllogs_ads_au.sh

在脚本中编写如下内容

#!/bin/bash

# 定义变量方便修改
DB=calllogs
hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期；如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
    calllogs_date=$1
else 
    calllogs_date=`date -d "-1 day" +%F`  
fi 

sql="
  set hive.exec.dynamic.partition.mode=nonstrict;
  
  insert overwrite table "$DB".ads_au_count 
  select  
    '$calllogs_date' dt,
     daycount.ct,
     wkcount.ct,
     mncount.ct,
     if(date_add(next_day('$calllogs_date','MO'),-1)='$calllogs_date','Y','N') ,
     if(last_day('$calllogs_date')='$calllogs_date','Y','N') 
  from 
  (
     select  
        '$calllogs_date' dt,
        count(*) ct
     from "$DB".dws_au_detail_day
     where dt='$calllogs_date'  
  )daycount join 
  ( 
     select  
       '$calllogs_date' dt,
       count (*) ct
     from "$DB".dws_au_detail_week
     where week_dt=concat(date_add(next_day('$calllogs_date','MO'),-7),'_' ,date_add(next_day('$calllogs_date','MO'),-1) )
  )  wkcount on daycount.dt=wkcount.dt
  join 
  ( 
     select  
       '$calllogs_date' dt,
       count (*) ct
     from "$DB".dws_au_detail_month
     where month_dt=date_format('$calllogs_date','yyyy-MM')  
  )mncount on daycount.dt=mncount.dt;
"

$hive -e "$sql"

2）增加脚本执行权限

[hadoop@hadoop101 bin]$ chmod +x calllogs_ads_au.sh

3）脚本使用

[hadoop@hadoop101 modules]$ calllogs_ads_au.sh 2021-12-08

4）脚本执行时间
企业开发中一般在每日凌晨30分~1点
5）查询导入结果

hive (calllogs)> select * from ads_au_count ;

2 新增用户

新增用户包括日新增用户、周新增用户、月新增用户。

2.1 DWS层

2.1.1每日新增用户明细表

1）建表语句

hive (calllogs)>
drop table if exists  `dws_new_user_day`;
create table `dws_new_user_day`
(
sys_id string,
service_name string,
home_province_code string,
visit_province_code string,
channel_code string,
service_code string,
imsi string,
msisdn string,
create_date string
)  COMMENT '每日新增用户信息'
stored as parquet
location '/warehouse/calllogs/dws/dws_new_user_day/';

2）导入数据
用每日活跃用户表 left join 每日新增设备表，关联的条件是msisdn相等。如果是每日新增的用户，则在每日新增用户表中为null。

hive (calllogs)>
insert into table dws_new_user_day  
select  
aud.sys_id,
aud.service_name,
aud.home_province_code,
aud.visit_province_code,
aud.channel_code,
aud.service_code,
aud.imsi,
aud.msisdn,
'2021-12-07'
from dws_au_detail_day aud left join dws_new_user_day nud on aud.msisdn=nud.msisdn
where aud.dt='2021-12-07' and nud.msisdn is null;

3）查询导入数据

hive (calllogs)> select count(*) from dws_new_user_day ;

2.1.2 DWS层每日新增用户数据加载脚本

1）在hadoop101的/home/hadoop/bin目录下创建脚本

[hadoop@hadoop101 bin]$ vim calllogs.dws_nu.sh

在脚本中编写如下内容

#!/bin/bash

# 定义变量方便修改
DB=calllogs
hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期；如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
    calllogs_date=$1
else 
    calllogs_date=`date -d "-1 day" +%F`  
fi 

sql="
  set hive.exec.dynamic.partition.mode=nonstrict;
  
  insert into table "$DB".dws_new_user_day  
  select  
  aud.sys_id,
  aud.service_name,
  aud.home_province_code,
  aud.visit_province_code,
  aud.channel_code,
  aud.service_code,
  aud.imsi,
  aud.msisdn,
  '$calllogs_date'
  from "$DB".dws_au_detail_day aud left join "$DB".dws_new_user_day nud on aud.msisdn=nud.msisdn
  where aud.dt='$calllogs_date' and nud.msisdn is null;
"

$hive -e "$sql"

2）增加脚本执行权限

[hadoop@hadoop101 bin]$ chmod +x calllogs.dws_nu.sh

3）脚本使用

[hadoop@hadoop101 modules]$ calllogs.dws_nu.sh 2021-12-08

4）脚本执行时间
企业开发中一般在每日凌晨30分~1点
5）查询导入结果

hive (calllogs)> select * from dws_new_user_day ;

2.2 ADS层

2.2.1每日新增用户表

1）建表语句

hive (calllogs)>
drop table if exists  `ads_new_user_count`;
create table `ads_new_user_count`
(
`create_date` string comment '创建时间' ,
`new_user_count` BIGINT comment '新增用户数量' 
)  COMMENT '每日新增用户数'
row format delimited fields terminated by '\t' 
location '/warehouse/calllogs/ads/ads_new_user_count/';

2）导入数据

hive (calllogs)>
insert into table ads_new_user_count 
select create_date, count(*) from dws_new_user_day
where create_date='2021-12-07'
group by create_date ;

3）查询导入数据

hive (calllogs)> select * from ads_new_user_count;

2.2.2每日新增用户表加载数据脚本

1）在hadoop101的/home/hadoop/bin目录下创建脚本

[hadoop@hadoop101 bin]$ vim calllogs.ads_nu.sh

在脚本中编写如下内容

#!/bin/bash

# 定义变量方便修改
DB=calllogs
hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期；如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
    calllogs_date=$1
else 
    calllogs_date=`date -d "-1 day" +%F`  
fi 

sql="
  set hive.exec.dynamic.partition.mode=nonstrict;
  
  insert into table "$DB".ads_new_user_count 
  select create_date, count(*) from "$DB".dws_new_user_day
  where create_date='$calllogs_date'
  group by create_date ;
"

$hive -e "$sql"

2）增加脚本执行权限

[hadoop@hadoop101 bin]$ chmod +x calllogs.ads_nu.sh

3）脚本使用

[hadoop@hadoop101 bin]$ calllogs.ads_nu.sh 2021-12-08

4）脚本执行时间
企业开发中一般在每日凌晨30分~1点
5）查询导入结果

hive (calllogs)> select * from ads_new_user_count;

3 留存用户

3.1 用户留存概念

留存用户：某段时间内的新增用户（活跃用户），经过一段时间后，又继续使用应用的被认作是留存用户；
留存率：留存用户占当时新增用户（活跃用户）的比例即是留存率。
例如，2月10日新增用户100，这100人在2月11日启动过应用的有30人，2月12日启动过应用的有25人，2月13日启动过应用的有32人；
则2月10日新增用户次日的留存率是30/100 = 30%，两日留存率是25/100=25%，三日留存率是32/100=32%。

3.2 用户留存分析

分析：假设今天是11日，要统计前1天也就是10日新增用户的留存率，公式如下：
10日新增用户的留存率= 10日的新增用户且11日活跃的 / 10日的新增用户

1）分母获取
（1）创建每日新增用户明细表
（2）通过每日新增用户明细表获取10日新增用户
2）分子获取
（1）10日新增的用户明细 join 11日活跃用户明细表=每日留存用户表
（2）对每日留存用户表count就得到了10日新增用户在11日的留存用户
3）留存率计算
（1） 10日的新增用户且 11日活跃的用户表与10日新增用户join，算出留存用户

3.3 DWS层

3.3.1 每日留存用户明细表

1）建表语句

hive (calllogs)>
drop table if exists `dws_user_retention_day`;
create table `dws_user_retention_day` 
(
sys_id string,
service_name string,
home_province_code string,
visit_province_code string,
channel_code string,
service_code string,
imsi string,
msisdn string,
`create_date` string  comment '设备新增时间',
`retention_day` int comment '截止当前日期留存天数'
) COMMENT '每日用户留存情况'
PARTITIONED BY ( `dt` string)
stored as parquet
location '/warehouse/calllogs/dws/dws_user_retention_day/';

2）导入数据（每天计算前1天的新用户访问留存明细）

hive (calllogs)>
insert overwrite table dws_user_retention_day partition(dt="2021-12-07")
select  
nud.sys_id,
nud.service_name,
nud.home_province_code,
nud.visit_province_code,
nud.channel_code,
nud.service_code,
nud.imsi,
nud.msisdn,
nud.create_date,
1 retention_day 
from dws_au_detail_day aud join dws_new_user_day nud on aud.msisdn =nud.msisdn 
where aud.dt='2021-12-07' and nud.create_date=date_add('2021-12-07',-1);

3）查询导入数据（每天计算前1天的新用户访问留存明细）

hive (calllogs)> select count(*) from dws_user_retention_day;

3.3.2 1,2,3,n天留存用户明细表

1）导入数据（每天计算前1,2,3，n天的新用户访问留存明细）

hive (calllogs)>
insert overwrite table dws_user_retention_day partition(dt="2021-12-07")
select  
nud.sys_id,
nud.service_name,
nud.home_province_code,
nud.visit_province_code,
nud.channel_code,
nud.service_code,
nud.imsi,
nud.msisdn,
nud.create_date,
1 retention_day 
from dws_au_detail_day aud join dws_new_user_day nud on aud.msisdn =nud.msisdn 
where aud.dt='2021-12-07' and nud.create_date=date_add('2021-12-07',-1)
union all
select  
nud.sys_id,
nud.service_name,
nud.home_province_code,
nud.visit_province_code,
nud.channel_code,
nud.service_code,
nud.imsi,
nud.msisdn,
nud.create_date,
2 retention_day 
from dws_au_detail_day aud join dws_new_user_day nud on aud.msisdn =nud.msisdn 
where aud.dt='2021-12-07' and nud.create_date=date_add('2021-12-07',-2)
union all
select  
nud.sys_id,
nud.service_name,
nud.home_province_code,
nud.visit_province_code,
nud.channel_code,
nud.service_code,
nud.imsi,
nud.msisdn,
nud.create_date,
3 retention_day 
from dws_au_detail_day aud join dws_new_user_day nud on aud.msisdn =nud.msisdn 
where aud.dt='2021-12-07' and nud.create_date=date_add('2021-12-07',-3);

2）查询导入数据（每天计算前1,2,3天的新用户访问留存明细）

hive (calllogs)> select retention_day, count(*) from dws_user_retention_day group by retention_day;

3.3.3 1,2,3,n天留存用户明细数据加载脚本

1）在hadoop101的/home/hadoop/bin目录下创建脚本

[hadoop@hadoop101 bin]$ vim calllogs_dws_user_retention.sh

在脚本中编写如下内容

#!/bin/bash

# 定义变量方便修改
DB=calllogs
hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期；如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
    calllogs_date=$1
else 
    calllogs_date=`date -d "-1 day" +%F`  
fi 

sql="
  set hive.exec.dynamic.partition.mode=nonstrict;
  
  insert overwrite table "$DB".dws_user_retention_day partition(dt='$calllogs_date')
  select  
  nud.sys_id,
  nud.service_name,
  nud.home_province_code,
  nud.visit_province_code,
  nud.channel_code,
  nud.service_code,
  nud.imsi,
  nud.msisdn,
  nud.create_date,
  1 retention_day 
  from "$DB".dws_au_detail_day aud join "$DB".dws_new_user_day nud on aud.msisdn =nud.msisdn 
  where aud.dt='$calllogs_date' and nud.create_date=date_add('$calllogs_date',-1)
  union all
  select  
  nud.sys_id,
  nud.service_name,
  nud.home_province_code,
  nud.visit_province_code,
  nud.channel_code,
  nud.service_code,
  nud.imsi,
  nud.msisdn,
  nud.create_date,
  2 retention_day 
  from "$DB".dws_au_detail_day aud join "$DB".dws_new_user_day nud on aud.msisdn =nud.msisdn 
  where aud.dt='$calllogs_date' and nud.create_date=date_add('$calllogs_date',-2)
  union all
  select  
  nud.sys_id,
  nud.service_name,
  nud.home_province_code,
  nud.visit_province_code,
  nud.channel_code,
  nud.service_code,
  nud.imsi,
  nud.msisdn,
  nud.create_date,
  3 retention_day 
  from "$DB".dws_au_detail_day aud join "$DB".dws_new_user_day nud on aud.msisdn =nud.msisdn 
  where aud.dt='$calllogs_date' and nud.create_date=date_add('$calllogs_date',-3);
"

$hive -e "$sql"

2）增加脚本执行权限

[hadoop@hadoop101 bin]$ chmod +x calllogs_dws_user_retention.sh

3）脚本使用

[hadoop@hadoop101 bin]$ calllogs_dws_user_retention.sh 2021-12-08

4）脚本执行时间
企业开发中一般在每日凌晨30分~1点
5）查询导入结果

hive (calllogs)> select retention_day, count(*) from dws_user_retention_day group by retention_day;

3.4 ADS层

3.4.1留存用户数

1）建表语句

hive (calllogs)>
drop table if exists  `ads_user_retention_day_count`;
create table `ads_user_retention_day_count` 
(
`create_date` string comment '用户新增日期',
`retention_day` int comment '截止当前日期留存天数',
retention_count bigint comment  '留存数量'
) COMMENT '每日用户留存情况'
stored as parquet
location '/warehouse/calllogs/ads/ads_user_retention_day_count/';

2）导入数据

hive (calllogs)>
insert into table ads_user_retention_day_count 
select   
create_date, 
retention_day, 
count(*) retention_count  
from dws_user_retention_day
where dt='2021-12-07' 
group by create_date,retention_day;

3）查询导入数据

hive (calllogs)> select * from ads_user_retention_day_count;

3.4.2 留存用户比率

1）建表语句

hive (calllogs)>
drop table if exists `ads_user_retention_day_rate`;
create table `ads_user_retention_day_rate` 
(
`stat_date` string comment '统计日期',
`create_date` string  comment '用户新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数量',
`new_user_count` string  comment '当日用户新增数量',
`retention_ratio` decimal(10,2) comment '留存率'
) COMMENT '每日用户留存情况'
stored as parquet
location '/warehouse/calllogs/ads/ads_user_retention_day_count/';

2）导入数据

hive (calllogs)>
insert into table ads_user_retention_day_rate
select 
    '2021-12-07' , 
    ur.create_date,
    ur.retention_day, 
    ur.retention_count , 
    nuc.new_user_count,
    ur.retention_count/nuc.new_user_count*100
from 
(
    select   
        create_date, 
        retention_day, 
        count(*) retention_count  
    from `dws_user_retention_day` 
    where dt='2021-12-07' 
    group by create_date,retention_day
)  ur join ads_new_user_count nuc on nuc.create_date=ur.create_date;

3）查询导入数据

hive (calllogs)>select * from ads_user_retention_day_rate;