select count( 0 )查询超时_项目实战从0到1之hive(29)企业级数据仓库构建(十):搭建 ADS 层...

本文介绍了如何在Hive中构建ADS层,包括活跃设备数(日、周、月)、每日新增设备、沉默用户、回流用户、流失用户和留存率的统计方法。通过建表、导入数据和查询来实现各项指标的计算,以应对`select count(0)`查询超时问题。
摘要由CSDN通过智能技术生成
45e49e5d312a5020125f8fe95bd70f88.png

                             点击上方 蓝字 关注我们           

a335fcacded13ccc5e0fd1a00cdbee10.png 42591abdd8d0c4fbafefd5971286adb9.png

一、数仓搭建 - ADS 层

1.1 设备主题

1.1.1 活跃设备数(日、周、月)

需求定义:

日活:当日活跃的设备数

周活:当周活跃的设备数

月活:当月活跃的设备数

1)建表语句

drop table if exists ads_uv_count;

create external table ads_uv_count(

`dt` string COMMENT '统计日期',

`day_count` bigint COMMENT '当日用户数量',

`wk_count` bigint COMMENT '当周用户数量',

`mn_count` bigint COMMENT '当月用户数量',

`is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最终结果',

`is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最终结果'

) COMMENT '活跃设备数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_uv_count/';

2)导入数据

insert into table ads_uv_count

select

'2020-03-10' dt,

daycount.ct,

wkcount.ct,

mncount.ct,

if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') ,

if(last_day('2020-03-10')='2020-03-10','Y','N')

from

(

select

'2020-03-10' dt,

count(*) ct

from dwt_uv_topic

where login_date_last='2020-03-10'

)daycount join

(

select

'2020-03-10' dt,

count (*) ct

from dwt_uv_topic

where login_date_last>=date_add(next_day('2020-03-10','MO'),-7)

and login_date_last<= date_add(next_day('2020-03-10','MO'),-1)

) wkcount on daycount.dt=wkcount.dt

join

(

select

'2020-03-10' dt,

count (*) ct

from dwt_uv_topic

where

date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM')

)mncount on daycount.dt=mncount.dt;

3)查询导入结果

1.1.2 每日新增设备

1)建表语句

drop table if exists ads_new_mid_count;

create external table ads_new_mid_count

(

`create_date` string comment '创建时间' ,

`new_mid_count` BIGINT comment '新增设备数量'

) COMMENT '每日新增设备信息数量'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_new_mid_count/';

2)导入数据

insert into table ads_new_mid_count

select

login_date_first,

count(*)

from dwt_uv_topic

where login_date_first='2020-03-10'

group by login_date_first;

3)查询导入数据

select * from ads_new_mid_count;

1.1.3 沉默用户数

需求定义:

沉默用户:只在安装当天启动过,且启动时间是在 7 天前

1)建表语句

drop table if exists ads_silent_count;

create external table ads_silent_count(

`dt` string COMMENT '统计日期',

`silent_count` bigint COMMENT '沉默设备数'

)

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_silent_count';

2)导入 2020-03-20 数据

insert into table ads_silent_count

select

'2020-03-15',

count(*)

from dwt_uv_topic

where login_date_first=login_date_last

and login_date_last<=date_add('2020-03-15',-7);

3)查询导入数据

select * from ads_silent_count;

1.1.4 本周回流用户数

需求定义:

本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备

1)建表语句

drop table if exists ads_back_count;

create external table ads_back_count(

`dt` string COMMENT '统计日期',

`wk_dt` string COMMENT '统计日期所在周',

`wastage_count` bigint COMMENT '回流设备数'

)

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_back_count';

2)导入数据:

insert into table ads_back_count

select

'2020-03-15',

count(*)

from

(

select

mid_id

from dwt_uv_topic

where login_date_last>=date_add(next_day('2020-03-15','MO'),-7)

and login_date_last<= date_add(next_day('2020-03-15','MO'),-1)

and login_date_first

)current_wk

left join

(

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2020-03-15','MO'),-7*2)

and dt<= date_add(next_day('2020-03-15','MO'),-7-1)

group by mid_id

)last_wk

on current_wk.mid_id=last_wk.mid_id

where last_wk.mid_id is null;

3)查询结果

select * from ads_back_count;

1.1.5 流失用户数

需求定义:

流失用户:最近 7 天未活跃的设备

1)建表语句

drop table if exists ads_wastage_count;

create external table ads_wastage_count(

`dt` string COMMENT '统计日期',

`wastage_count` bigint COMMENT '流失设备数'

)

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_wastage_count';

2)导入 2020-03-20 数据

insert into table ads_wastage_count

select

'2020-03-20',

count(*)

from

(

select

mid_id

from dwt_uv_topic

where login_date_last<=date_add('2020-03-20',-7)

group by mid_id

)t1;

3)查询结果

select * from ads_wastage_count;

1.1.6 留存率

c0216ec47f7271917ea0b9c3d8c380ba.png

1)建表语句

drop table if exists ads_user_retention_day_rate;

create external table ads_user_retention_day_rate

(

`stat_date` string comment '统计日期',

`create_date` string comment '设备新增日期',

`retention_day` int comment '截止当前日期留存天数',

`retention_count` bigint comment '留存数量',

`new_mid_count` bigint comment '设备新增数量',

`retention_ratio` decimal(10,2) comment '留存率'

) COMMENT '每日用户留存情况'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_user_retention_day_rate/';

2)导入数据

insert into table ads_user_retention_day_rate

select

'2020-03-10',--统计日期

date_add('2020-03-10',-1),--新增日期

1,--留存天数

sum(if(login_date_first=date_add('2020-03-10',-1) and

login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存数

sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增

sum(if(login_date_first=date_add('2020-03-10',-1) and

login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-

1),1,0))*100

from dwt_uv_topic

union all

select

'2020-03-10',--统计日期

date_add('2020-03-10',-2),--新增日期

2,--留存天数

sum(if(login_date_first=date_add('2020-03-10',-2) and

login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存数

sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增

sum(if(login_date_first=date_add('2020-03-10',-2) and

login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-

2),1,0))*100

from dwt_uv_topic

union all

select

'2020-03-10',--统计日期

date_add('2020-03-10',-3),--新增日期

3,--留存天数

sum(if(login_date_first=date_add('2020-03-10',-3) and

login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存数

sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增

sum(if(login_date_first=date_add('2020-03-10',-3) and

login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-

3),1,0))*100

from dwt_uv_topic;

3)查询导入数据

select * from ads_user_retention_day_rate;drop table if exists ads_continuity_wk_count;

create external table ads_continuity_wk_count(

`dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日

期',

`wk_dt` string COMMENT '持续时间',

`continuity_count` bigint COMMENT '活跃次数'

)

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_continuity_wk_count';

2)导入 2020-03-20 所在周的数据

insert into table ads_continuity_wk_count

select

'2020-03-15',

concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day('

2020-03-15','MO'),-1)),

count(*)

from

(

select

mid_id

from

(

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2020-03-10','monday'),-7)

and dt<=date_add(next_day('2020-03-10','monday'),-1)

group by mid_id

union all

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2020-03-10','monday'),-7*2)

and dt<=date_add(next_day('2020-03-10','monday'),-7-1)

group by mid_id

union all

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2020-03-10','monday'),-7*3)

and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1)

group by mid_id

)t1

group by mid_id

having count(*)=3

)t2

3)查询

select * from ads_continuity_wk_count;

1.1.8 最近七天内连续三天活跃用户数

1)建表语句

drop table if exists ads_continuity_uv_count;

create external table ads_continuity_uv_count(

`dt` string COMMENT '统计日期',

`wk_dt` string COMMENT '最近 7 天日期',

`continuity_count` bigint

) COMMENT '连续活跃设备数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_continuity_uv_count';

2)写出导入数据的 SQL 语句

insert into table ads_continuity_uv_count

select

'2020-03-12',

concat(date_add('2020-03-12',-6),'_','2020-03-12'),

count(*)

from

(

select mid_id

from

(

select mid_id

from

(

select

mid_id,

date_sub(dt,rank) date_dif

from

(

select

mid_id,

dt,

rank() over(partition by mid_id order by dt) rank

from dws_uv_detail_daycount

where dt>=date_add('2020-03-12',-6) and

dt<='2020-03-12'

)t1

)t2

group by mid_id,date_dif

having count(*)>=3

)t3

group by mid_id

)t4;drop table if exists ads_user_topic;

create external table ads_user_topic(

`dt` string COMMENT '统计日期',

`day_users` string COMMENT '活跃会员数',

`day_new_users` string COMMENT '新增会员数',

`day_new_payment_users` string COMMENT '新增消费会员数',

`payment_users` string COMMENT '总付费会员数',

`users` string COMMENT '总会员数',

`day_users2users` decimal(10,2) COMMENT '会员活跃率',

`payment_users2users` decimal(10,2) COMMENT '会员付费率',

`day_new_users2users` decimal(10,2) COMMENT '会员新鲜度'

) COMMENT '会员主题信息表'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_user_topic';

2)导入数据

insert into table ads_user_topic

select

'2020-03-10',

sum(if(login_date_last='2020-03-10',1,0)),

sum(if(login_date_first='2020-03-10',1,0)),

sum(if(payment_date_first='2020-03-10',1,0)),

sum(if(payment_count>0,1,0)),

count(*),

sum(if(login_date_last='2020-03-10',1,0))/count(*),

sum(if(payment_count>0,1,0))/count(*),

sum(if(login_date_first='2020-03-10',1,0))/sum(if(login_date_last='2020-03-10',1,0))

from dwt_user_topic

3)查询数据

hive (gmall)> select * from ads_user_topic;

4)vim ads_user_topic.sh

添加如下内容:

#!/bin/bash

APP=gmall

hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天

if [ -n "$1" ] ;then

do_date=$1

else

do_date=`date -d "-1 day" +%F`

fi

sql="

with

tmp_day_users as

(

select

'$do_date' dt,

count(*) day_users

from

${APP}.dwt_user_topic

where

login_date_last='$do_date'

),

tmp_day_new_users as

(

select

'$do_date' dt,

count(*) day_new_users

from

${APP}.dwt_user_topic

where

login_date_last='$do_date' and login_date_first='$do_date'

),

tmp_day_new_payment_users as

(

select

'$do_date' dt,

count(*) day_new_payment_users

from

${APP}.dwt_user_topic

where

payment_date_first='$do_date'

),

tmp_payment_users as

(

select

'$do_date' dt,

count(*) payment_users

from

${APP}.dwt_user_topic

where

payment_date_first is not null

),

tmp_users as

(

select

'$do_date' dt,

count(*) users

from

${APP}.dwt_user_topic

tmp_users

)

insert into table ${APP}.ads_user_topic

select

'$do_date' dt,

day_users,

day_new_users,

day_new_payment_users,

payment_users,

users,

day_users/users,

payment_users/users,

day_new_users/users

from

tmp_day_users

join

tmp_day_new_users

on

tmp_day_users.dt=tmp_day_new_users.dt

join

tmp_day_new_payment_users

on

tmp_day_users.dt=tmp_day_new_payment_users.dt

join

tmp_payment_users

on

tmp_day_users.dt=tmp_payment_users.dt

join

tmp_users

on

tmp_day_users.dt=tmp_users.dt;

"

$hive -e "$sql"

5)增加脚本执行权限

chmod 770 ads_user_topic.sh

6)执行脚本导入数据

ads_user_topic.sh 2020-03-11

7)查看导入数据

select * from ads_user_topic;

7.2.2 漏斗分析

统计“浏览->购物车->下单->支付”的转化率

思路:统计各个行为的人数,然后计算比值

1)建表语句

drop table if exists ads_user_action_convert_day;

create external table ads_user_action_convert_day(

`dt` string COMMENT '统计日期',

`total_visitor_m_count` bigint COMMENT '总访问人数',

`cart_u_count` bigint COMMENT '加入购物车的人数',

`visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率',

`order_u_count` bigint COMMENT '下单人数',

`cart2order_convert_ratio` decimal(10,2) COMMENT '加入购物车到下单转化率',

`payment_u_count` bigint COMMENT '支付人数',

`order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率'

) COMMENT '用户行为漏斗分析'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_user_action_convert_day/';

2)数据装载

insert into table ads_user_action_convert_day

select

'2020-03-10',

uv.day_count,

ua.cart_count,

cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio,

ua.order_count,

cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio,

ua.payment_count,

cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio

from

(

select

dt,

sum(if(cart_count>0,1,0)) cart_count,

sum(if(order_count>0,1,0)) order_count,

sum(if(payment_count>0,1,0)) payment_count

from dws_user_action_daycount

where dt='2020-03-10'

group by dt

)ua join ads_uv_count uv on uv.dt=ua.dt;

3)查询加载数据

select * from ads_user_action_convert_day;

7.3 商品主题

7.3.1 商品个数信息

1)建表语句

drop table if exists ads_product_info;

create external table ads_product_info(

`dt` string COMMENT '统计日期',

`sku_num` string COMMENT 'sku 个数',

`spu_num` string COMMENT 'spu 个数'

) COMMENT '商品个数信息'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_info';

2)导入数据

insert into table ads_product_info

select

'2020-03-10' dt,

sku_num,

spu_num

from

(

select

'2020-03-10' dt,

count(*) sku_num

from

dwt_sku_topic

) tmp_sku_num

join

(

select

'2020-03-10' dt,

count(*) spu_num

from

(

select

spu_id

from

dwt_sku_topic

group by

spu_id

) tmp_spu_id

) tmp_spu_num

on

tmp_sku_num.dt=tmp_spu_num.dt;

3)查询结果数据

select * from ads_product_info;drop table if exists ads_product_sale_topN;

create external table ads_product_sale_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品 ID',

`payment_amount` bigint COMMENT '销量'

) COMMENT '商品个数信息'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_sale_topN';

2)导入数据

insert into table ads_product_sale_topN

select

'2020-03-10' dt,

sku_id,

payment_amount

from

dws_sku_action_daycount

where

dt='2020-03-10'

order by payment_amount desc

limit 10;

3)查询结果数据

select * from ads_product_sale_topN;drop table if exists ads_product_favor_topN;

create external table ads_product_favor_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品 ID',

`favor_count` bigint COMMENT '收藏量'

) COMMENT '商品收藏 TopN'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_favor_topN';

2)导入数据

insert into table ads_product_favor_topN

select

'2020-03-10' dt,

sku_id,

favor_count

from

dws_sku_action_daycount

where

dt='2020-03-10'

order by favor_count desc

limit 10;

3)查询数据

select * from ads_product_favor_topN;drop table if exists ads_product_cart_topN;

create external table ads_product_cart_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品 ID',

`cart_num` bigint COMMENT '加入购物车数量'

) COMMENT '商品加入购物车 TopN'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_cart_topN';

2)导入数据

insert into table ads_product_cart_topN

select

'2020-03-10' dt,

sku_id,

cart_num

from

dws_sku_action_daycount

where

dt='2020-03-10'

order by cart_num desc

limit 10;

3)查询数据

select * from ads_product_cart_topN;

7.3.5 商品退款率排名(最近 30 天)

1)建表语句

drop table if exists ads_product_refund_topN;

create external table ads_product_refund_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品 ID',

`refund_ratio` decimal(10,2) COMMENT '退款率'

) COMMENT '商品退款率 TopN'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_refund_topN';

2)导入数据

insert into table ads_product_refund_topN

select

'2020-03-10',

sku_id,

refund_last_30d_count/payment_last_30d_count*100 refund_ratio

from dwt_sku_topic

order by refund_ratio desc

limit 10;

3)查询数据

select * from ads_product_refund_topN;

7.3.6 商品差评率

1)建表语句

drop table if exists ads_appraise_bad_topN;

create external table ads_appraise_bad_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品 ID',

`appraise_bad_ratio` decimal(10,2) COMMENT '差评率'

) COMMENT '商品差评率 TopN'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_appraise_bad_topN';

2)导入数据

insert into table ads_appraise_bad_topN

select

'2020-03-10' dt,

sku_id,

appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun

t+appraise_default_count) appraise_bad_ratio

from

dws_sku_action_daycount

where

dt='2020-03-10'

order by appraise_bad_ratio desc

limit 10;

3)查询数

select * from ads_appraise_bad_topN;

7.4 营销主题(用户+商品+购买行为)

7.4.1 下单数目统计

需求分析:统计每日下单数,下单金额及下单用户数

1)建表语句

drop table if exists ads_order_daycount;

create external table ads_order_daycount(

dt string comment '统计日期',

order_count bigint comment '单日下单笔数',

order_amount decimal(10,2) comment '单日下单金额',

order_users bigint comment '单日下单用户数'

) comment '每日订单总计表'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_order_daycount';

2)导入数据

insert into table ads_order_daycount

select

'2020-03-10',

sum(order_count),

sum(order_amount),

sum(if(order_count>0,1,0))

from dws_user_action_daycount

where dt='2020-03-10';

3)查询数据

select * from ads_order_daycount;

7.4.2 支付信息统计

每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)

1)建表

drop table if exists ads_payment_daycount;

create external table ads_payment_daycount(

dt string comment '统计日期',

payment_count bigint comment '单日支付笔数',

payment_amount decimal(10,2) comment '单日支付金额',

payment_user_count bigint comment '单日支付人数',

payment_sku_count bigint comment '单日支付商品数',

payment_avg_time double comment '下单到支付的平均时长,取分钟数'

) comment '每日订单总计表'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_payment_daycount';

2)导入数据

insert into table ads_payment_daycount

select

tmp_payment.dt,

tmp_payment.payment_count,

tmp_payment.payment_amount,

tmp_payment.payment_user_count,

tmp_skucount.payment_sku_count,

tmp_time.payment_avg_time

from

(

select

'2020-03-15' dt,

sum(payment_count) payment_count,

sum(payment_amount) payment_amount,

sum(if(payment_count>0,1,0)) payment_user_count

from dws_user_action_daycount

where dt='2020-03-15'

)tmp_payment

join

(

select

'2020-03-15' dt,

sum(if(payment_count>0,1,0)) payment_sku_count

from dws_sku_action_daycount

where dt='2020-03-15'

)tmp_skucount on tmp_payment.dt=tmp_skucount.dt

join

(

select

'2020-03-15' dt,

sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60

payment_avg_time

from dwd_fact_order_info

where dt='2020-03-15'

and payment_time is not null

)tmp_time on tmp_payment.dt=tmp_time.dt

3)查询数据

select * from ads_payment_daycount;

7.4.3 复购率

1)建表语句

drop table ads_sale_tm_category1_stat_mn;

create external table ads_sale_tm_category1_stat_mn

(

tm_id string comment '品牌 id',

category1_id string comment '1 级品类 id ',

category1_name string comment '1 级品类名称 ',

buycount bigint comment '购买人数',

buy_twice_last bigint comment '两次以上购买人数',

buy_twice_last_ratio decimal(10,2) comment '单次复购率',

buy_3times_last bigint comment '三次以上购买人数',

buy_3times_last_ratio decimal(10,2) comment '多次复购率',

stat_mn string comment '统计月份',

stat_date string comment '统计日期'

) COMMENT '复购率统计'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';

2)数据导入

insert into table ads_sale_tm_category1_stat_mn

select

mn.sku_tm_id,

mn.sku_category1_id,

mn.sku_category1_name,

sum(if(mn.order_count>=1,1,0)) buycount,

sum(if(mn.order_count>=2,1,0)) buyTwiceLast,

sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))

buyTwiceLastRatio,

sum(if(mn.order_count>=3,1,0)) buy3timeLast ,

sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))

buy3timeLastRatio ,

date_format('2020-03-10' ,'yyyy-MM') stat_mn,

'2020-03-10' stat_date

from

(

select

user_id,

sd.sku_tm_id,

sd.sku_category1_id,

sd.sku_category1_name,

sum(order_count) order_count

from dws_sale_detail_daycount sd

where date_format(dt,'yyyy-MM')=date_format('2020-03-10' ,'yyyy-MM')

group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name

) mn

group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;

7.5 ADS 层导入脚本

1)vim dwt_to_ads.sh

在脚本中填写如下内容

#!/bin/bash

hive=/opt/modules/hive/bin/hive

# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天

if [ -n "$1" ] ;then

do_date=$1

else

do_date=`date -d "-1 day" +%F`

fi

sql="use gmall;

insert into table ads_uv_count

select

'$do_date',

sum(if(login_date_last='$do_date',1,0)),

sum(if(login_date_last>=date_add(next_day('$do_date','monday'),-7) and

login_date_last<=date_add(next_day('$do_date','monday'),-1) ,1,0)),

sum(if(date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-M

M'),1,0)),

if('$do_date'=date_add(next_day('$do_date','monday'),-1),'Y','N'),

if('$do_date'=last_day('$do_date'),'Y','N')

from dwt_uv_topic;

insert into table ads_new_mid_count

select

'$do_date',

count(*)

from dwt_uv_topic

where login_date_first='$do_date';

insert into table ads_silent_count

select

'$do_date',

count(*)

from dwt_uv_topic

where login_date_first=login_date_last

and login_date_last<=date_add('$do_date',-7);

insert into table ads_back_count

select

'$do_date',

concat(date_add(next_day('2020-03-10','MO'),-7),'_',date_add(next_day('2020-

03-10','MO'),-1)),

count(*)

from

(

select

mid_id

from dwt_uv_topic

where login_date_last>=date_add(next_day('$do_date','MO'),-7)

and login_date_last<= date_add(next_day('$do_date','MO'),-1)

and login_date_first

)current_wk

left join

(

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','MO'),-7*2)

and dt<= date_add(next_day('$do_date','MO'),-7-1)

group by mid_id

)last_wk

on current_wk.mid_id=last_wk.mid_id

where last_wk.mid_id is null;

insert into table ads_wastage_count

select

'$do_date',

count(*)

from dwt_uv_topic

where login_date_last<=date_add('$do_date',-7);

insert into table ads_user_retention_day_rate

select

'$do_date',

date_add('$do_date',-3),

3,

sum(if(login_date_first=date_add('$do_date',-3) and

login_date_last='$do_date',1,0)),

sum(if(login_date_first=date_add('$do_date',-3),1,0)),

sum(if(login_date_first=date_add('$do_date',-3) and

login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',

-3),1,0))*100

from dwt_uv_topic

union all

select

'$do_date',

date_add('$do_date',-2),

2,

sum(if(login_date_first=date_add('$do_date',-2) and

login_date_last='$do_date',1,0)),

sum(if(login_date_first=date_add('$do_date',-2),1,0)),

sum(if(login_date_first=date_add('$do_date',-2) and

login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',

-2),1,0))*100

from dwt_uv_topic

union all

select

'$do_date',

date_add('$do_date',-1),

1,

sum(if(login_date_first=date_add('$do_date',-1) and

login_date_last='$do_date',1,0)),

sum(if(login_date_first=date_add('$do_date',-1),1,0)),

sum(if(login_date_first=date_add('$do_date',-1) and

login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',

-1),1,0))*100

from dwt_uv_topic;

insert into table ads_continuity_wk_count

select

'$do_date',

concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_d

ate','MO'),-1)),

count(*)

from

(

select

mid_id

from

(

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','monday'),-7)

and dt<=date_add(next_day('$do_date','monday'),-1)

group by mid_id

union all

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','monday'),-7*2)

and dt<=date_add(next_day('$do_date','monday'),-7-1)

group by mid_id

union all

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','monday'),-7*3)

and dt<=date_add(next_day('$do_date','monday'),-7*2-1)

group by mid_id

)t1

group by mid_id

having count(*)=3

)t2;

insert into table ads_continuity_uv_count

select

'$do_date',

concat(date_add('$do_date',-6),'_','$do_date'),

count(*)

from

(

select mid_id

from

(

select mid_id

from

(

select

mid_id,

date_sub(dt,rank) date_dif

from

(

select

mid_id,

dt,

rank() over(partition by mid_id order by dt) rank

from dws_uv_detail_daycount

where dt>=date_add('$do_date',-6) and dt<='$do_date'

)t1

)t2

group by mid_id,date_dif

having count(*)>=3

)t3

group by mid_id

)t4;

insert into table ads_user_topic

select

'$do_date',

sum(if(login_date_last='$do_date',1,0)),

sum(if(login_date_first='$do_date',1,0)),

sum(if(payment_date_first='$do_date',1,0)),

sum(if(payment_count>0,1,0)),

count(*),

sum(if(login_date_last='$do_date',1,0))/count(*),

sum(if(payment_count>0,1,0))/count(*),

sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,

0))

from dwt_user_topic;

insert into table ads_user_action_convert_day

select

'$do_date',

uv.day_count,

ua.cart_count,

ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio,

ua.order_count,

ua.order_count/ua.cart_count*100 visitor2order_convert_ratio,

ua.payment_count,

ua.payment_count/ua.order_count*100 order2payment_convert_ratio

from

(

select

'$do_date' dt,

sum(if(cart_count>0,1,0)) cart_count,

sum(if(order_count>0,1,0)) order_count,

sum(if(payment_count>0,1,0)) payment_count

from dws_user_action_daycount

where dt='$do_date'

)ua join ads_uv_count uv on uv.dt=ua.dt;

insert into table ads_product_info

select

'$do_date' dt,

sku_num,

spu_num

from

(

select

'$do_date' dt,

count(*) sku_num

from

dwt_sku_topic

) tmp_sku_num

join

(

select

'$do_date' dt,

count(*) spu_num

from

(

select

spu_id

from

dwt_sku_topic

group by

spu_id

) tmp_spu_id

) tmp_spu_num

on tmp_sku_num.dt=tmp_spu_num.dt;

insert into table ads_product_sale_topN

select

'$do_date',

sku_id,

payment_amount

from dws_sku_action_daycount

where dt='$do_date'

order by payment_amount desc

limit 10;

insert into table ads_product_favor_topN

select

'$do_date',

sku_id,

favor_count

from dws_sku_action_daycount

where dt='$do_date'

order by favor_count

limit 10;

insert into table ads_product_cart_topN

select

'$do_date' dt,

sku_id,

cart_num

from dws_sku_action_daycount

where dt='$do_date'

order by cart_num

limit 10;

insert into table ads_product_refund_topN

select

'$do_date',

sku_id,

refund_last_30d_count/payment_last_30d_count*100 refund_ratio

from dwt_sku_topic

order by refund_ratio desc

limit 10;

insert into table ads_appraise_bad_topN

select

'$do_date' dt,

sku_id,

appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun

t+appraise_default_count)*100 appraise_bad_ratio

from dws_sku_action_daycount

where dt='$do_date'

order by appraise_bad_ratio desc

limit 10;

insert into table ads_order_daycount

select

'$do_date',

sum(order_count),

sum(order_amount),

sum(if(order_count>0,1,0))

from dws_user_action_daycount

where dt='$do_date';

insert into table ads_payment_daycount

select

tmp_payment.dt,

tmp_payment.payment_count,

tmp_payment.payment_amount,

tmp_payment.payment_user_count,

tmp_skucount.payment_sku_count,

tmp_time.payment_avg_time

from

(

select

'$do_date' dt,

sum(payment_count) payment_count,

sum(payment_amount) payment_amount,

sum(if(payment_count>0,1,0)) payment_user_count

from dws_user_action_daycount

where dt='$do_date'

)tmp_payment

join

(

select

'$do_date' dt,

sum(if(payment_count>0,1,0)) payment_sku_count

from dws_sku_action_daycount

where dt='$do_date'

)tmp_skucount on tmp_payment.dt=tmp_skucount.dt

join

(

select

'$do_date' dt,

sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60

payment_avg_time

from dwd_fact_order_info

where dt='$do_date'

and payment_time is not null

)tmp_time on tmp_payment.dt=tmp_time.dt;

insert into table ads_sale_tm_category1_stat_mn

select

mn.sku_tm_id,

mn.sku_category1_id,

mn.sku_category1_name,

sum(if(mn.order_count>=1,1,0)) buycount,

sum(if(mn.order_count>=2,1,0)) buyTwiceLast,

sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))

buyTwiceLastRatio,

sum(if(mn.order_count>=3,1,0)) buy3timeLast ,

sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))

buy3timeLastRatio ,

date_format('$do_date' ,'yyyy-MM') stat_mn,

'$do_date' stat_date

from

(

select

user_id,

sd.sku_tm_id,

sd.sku_category1_id,

sd.sku_category1_name,

sum(order_count) order_count

from dws_sale_detail_daycount sd

where date_format(dt,'yyyy-MM')=date_format('$do_date' ,'yyyy-MM')

group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name

) mn

group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;

"

$hive -e "$sql"

2)增加脚本执行权限

3)执行脚本导入数据

5e57d0384fd72ac6c1c6a633f67c0e41.png

ca241e541e326502800faed7592451f0.png
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值