在Mysql中生成原始数据
- 在mysql中生成数据
source /root/snbap_ods.sql;
Hive分层
这里的项目Hive分层有:ODS->DWD->DWS->DM(将DW层拆成了DWD,DWDS)
DWD(data warehouse detail): 数据明细层
DWS(data warehouse service): 数据服务层(轻度的汇总,DWS层一般是跨表的)
DM(data market):数据集市层(高度汇总)
将Mysql中的数据导入Hive的ODS层
这里需要注意有些表需要全量导入,有的需要增量导入
这里还可以将sqoop写成脚本,设置每日自动执行
全量导入: snbap_ods.ods_user
全量导入: snbap_ods.ods_user_extend
全量导入: snbap_ods.ods_user_addr
全量导入: snbap_ods.ods_biz_trade
全量导入: snbap_ods.ods_biz_trade
封装2个全量脚本, 初始化脚本(只有第一次执行,后面就不用再执行了) 和 每天定时脚本(从第二天开始用)
############脚本一: ods_full_sqoop_load.sh (初始化脚本)###############
#!/bin/bash
sqoop job --delete bap_user
sqoop job --create bap_user \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table user \
--delete-target-dir \
--target-dir /snbap/ods/ods_user \
--fields-terminated-by '\001'
sqoop job --delete bap_user_extend
sqoop job --create bap_user_extend \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table user_extend \
--delete-target-dir \
--target-dir /snbap/ods/ods_user_extend \
--fields-terminated-by '\001'
sqoop job --delete bap_user_addr
sqoop job --create bap_user_addr \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table user_addr \
--delete-target-dir \
--target-dir /snbap/ods/ods_user_addr \
--fields-terminated-by '\001'
sqoop job --delete bap_biz_trade
sqoop job --create bap_biz_trade \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table biz_trade \
--delete-target-dir \
--target-dir /snbap/ods/ods_biz_trade \
--fields-terminated-by '\001'
sqoop job --exec bap_user
sqoop job --exec bap_user_extend
sqoop job --exec bap_user_addr
sqoop job --exec bap_biz_trade
#######################结束#####################################
############脚本二: ods_full_load.sh (第二天起,每日执行一次) ###############
#!/bin/bash
sqoop job --exec bap_user
sqoop job --exec bap_user_extend
sqoop job --exec bap_user_addr
sqoop job --exec bap_biz_trade
#######################结束#####################################
# 定时执行
crontab -e
0 2 * * * /root/ods_full_load.sh
分,时,日,月,周
增量导入: snbap_ods.ods_user_pc_click_log
增量导入: snbap_ods.ods_user_app_click_log
增量导入: snbap_ods.ods_user_order
增量导入: snbap_ods.ods_order_item
增量导入: snbap_ods.ods_order_delivery
增量导入: snbap_ods.ods_order_cart**
封装2个增量脚本, 初始化脚本(只有第一次执行,后面就不用再执行了) 和 每天定时脚本(从第二天开始用)
############脚本一: ods_incr_sqoop_load.sh (初始化脚本)###############
#!/bin/bash
sqoop job --delete bap_ods_user_pc_click
sqoop job --create bap_ods_user_pc_click \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table user_pc_click_log \
--target-dir /snbap/ods/ods_user_pc_click_log \
--fields-terminated-by '\001' \
--check-column log_id \
--incremental append \
--last-value 0
sqoop job --delete bap_ods_user_app_click_log
sqoop job --create bap_ods_user_app_click_log \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table user_app_click_log \
--target-dir /snbap/ods/ods_user_app_click_log \
--fields-terminated-by '\001' \
--check-column log_id \
--incremental append \
--last-value 0
sqoop job --delete bap_ods_user_order
sqoop job --create bap_ods_user_order \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table us_order \
--target-dir /snbap/ods/ods_us_order \
--fields-terminated-by '\001' \
--check-column order_id \
--incremental append \
--last-value 0
sqoop job --delete bap_ods_order_item
sqoop job --create bap_ods_order_item \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table order_item \
--target-dir /snbap/ods/ods_order_item \
--fields-terminated-by '\001' \
--check-column order_id \
--incremental append \
--last-value 0
sqoop job --delete bap_ods_order_delivery
sqoop job --create bap_ods_order_delivery \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table order_delivery \
--target-dir /snbap/ods/ods_order_delivery \
--fields-terminated-by '\001' \
--check-column order_id \
--incremental append \
--last-value 0
sqoop job --delete bap_ods_order_cart
sqoop job --create bap_ods_order_cart \
-- import \
--connect jdbc:mysql://localhost:3306/snbap_ods \
--driver com.mysql.jdbc.Driver \
--username root \
--password-file /sqoop/pwd/sqoopPWD.pwd \
--table cart \
--target-dir /snbap/ods/ods_cart \
--fields-terminated-by '\001' \
--check-column cart_id \
--incremental append \
--last-value 0
sqoop job --exec bap_ods_user_pc_click
sqoop job --exec bap_ods_user_app_click_log
sqoop job --exec bap_ods_user_order
sqoop job --exec bap_ods_order_item
sqoop job --exec bap_ods_order_delivery
sqoop job --exec bap_ods_order_cart
#######################结束#####################################
############增量导入每日定时脚本 #########################
#!/bin/bash
dt=
if [ $# == 0 ]
then
dt=`date -d "1 day ago" "+%Y%m%d"`
else
dt=$1
fi
sqoop job --exec bap_ods_user_pc_click
sqoop job --exec bap_ods_user_app_click_log
sqoop job --exec bap_ods_user_order
sqoop job --exec bap_ods_order_item
sqoop job --exec bap_ods_order_delivery
sqoop job --exec bap_ods_order_cart
hive --database snbap_ods -e "load data inpath '/snbap/ods/ods_user_pc_click_log/*' into table snbap_ods.ods_user_pc_click_log partition(dt=$dt)"
hive --database snbap_ods -e "load data inpath '/snbap/ods/ods_user_app_click_log/*' into table snbap_ods.ods_user_app_click_log partition(dt=$dt)"
hive --database snbap_ods -e "load data inpath '/snbap/ods/ods_us_order/*' into table snbap_ods.ods_us_order partition(dt=$dt)"
hive --database snbap_ods -e "load data inpath '/snbap/ods/ods_order_item/*' into table snbap_ods.ods_order_item partition(dt=$dt)"
hive --database snbap_ods -e "load data inpath '/snbap/ods/ods_order_delivery/*' into table snbap_ods.ods_order_delivery partition(dt=$dt)"
hive --database snbap_ods -e "load data inpath '/snbap/ods/ods_cart/*' into table snbap_ods.ods_cart partition(dt=$dt)"
#######################结束#####################################
# 方法二: 直接使用sqoop将数据导入到分区表的分区目录下, 然后手动添加分区
alter table snbap_ods.ods_user_pc_click_log add partition(dt=$dt) location '/snbap/ods/ods_user_pc_click_log/dt=$dt'
DWD层数据加载
-- 加载dwd_user表数据
insert overwrite table snbap_dwd.dwd_user
select
user_id ,
user_name ,
user_gender ,
user_birthday ,
user_age ,
constellation ,
province ,
city ,
city_level ,
e_mail ,
op_mail ,
mobile ,
num_seg_mobile,
op_mobile ,
register_time ,
login_ip ,
login_source ,
request_user ,
total_score ,
used_score ,
is_blacklist ,
is_married ,
education ,
monthly_income,
profession ,
create_date
from
snbap_ods.ods_user;
-- 加载dwd_user_extend 表数据
insert overwrite table snbap_dwd.dwd_user_extend
select
user_id ,
user_gender ,
is_pregnant_woman ,
is_have_children ,
is_have_car ,
phone_brand ,
phone_brand_level ,
phone_cnt ,
change_phone_cnt ,
is_maja ,
majia_account_cnt ,
loyal_model ,
shopping_type_model ,
weight ,
height ,
current_timestamp() dw_date
from snbap_ods.ods_user_extend
--加载PC访问表
--没有当聚合函数和非聚合函数在一起的视乎非聚合函数的需要group by
insert overwrite table snbap_dwd.dwd_user_pc_pv partition(dt='20200904')
select
max(log_id),
user_id,
session_id,
cookie_id,
min(visit_time) in_time ,
max(visit_time) out_time,
unix_timestamp(max(visit_time))-unix_timestamp(min(visit_time)) stay_time,
count(1) pv ,
visit_os ,
browser_name ,
visit_ip,
province ,
city ,
current_timestamp() dw_date
from snbap_ods.ods_user_pc_click_log
where dt='20200904'
group by
user_id ,
session_id ,
cookie_id ,
visit_os ,
browser_name ,
visit_ip,
province ,
city
;
--加载dwd_user_addr
insert overwrite table snbap_dwd.dwd_user_addr
select
user_id,
order_addr,
user_order_flag,
addr_id,
arear_id,
current_timestamp() dw_date
from snbap_ods.ods_user_addr;
--
导入DWS层表数据
# 加载dws层的用户宽表
insert overwrite table snbap_dws.dws_user_basic
select
u.user_id ,
u.user_name ,
u.user_gender ,
u.user_birthday ,
u.user_age ,
u.constellation ,
u.province ,
u.city ,
u.city_level ,
u.e_mail ,
u.op_mail ,
u.mobile ,
u.num_seg_mobile ,
u.op_mobile ,
u.register_time ,
u.login_ip ,
u.login_source ,
u.request_user ,
u.total_score ,
u.used_score ,
u.is_blacklist ,
u.is_married ,
u.education ,
u.monthly_income ,
u.profession ,
ue.is_pregnant_woman,
ue.is_have_children ,
ue.is_have_car ,
ue.phone_brand ,
ue.phone_brand_level,
ue.phone_cnt ,
ue.change_phone_cnt ,
ue.is_maja ,
ue.majia_account_cnt,
ue.loyal_model ,
ue.shopping_type_model,
ue.weight ,
ue.height
from snbap_dwd.dwd_user u
join snbap_dwd.dwd_user_extend ue on u.user_id=ue.user_id
;
# dws_user_visit_month1
用户id, ip, cookie, 浏览器, os, ....
10001, 192.168..., xxx , chrome, windows
10001, 192.168..., xxx , chrome, windows
10002, 192.168..., yyy , chrome, mac
10001, 192.167..., zzz , chrome, ubuntu
用户id, 类型, 内容, 次数 排名
10001, ip , 192.168..., 2 1
10001, cookie, xxx, 2 1
10001, 浏览器, chrome, 3 1
10001, os, windows, 2 1
10001, visit_ip , 192.167..., 1 2
10001, cookie, zzz, 1 2
10001, os, ubuntu, 1 2
10002, ip , 192.168..., 1 1
10002, cookie, yyy, 1 1
10002, 浏览器 , chrome, 1 1
10002, os , mac, 1 1
insert overwrite table snbap_dws.dws_user_visit_month1 partition(dt='20200904')
select
tmp.user_id,
tmp.`type`,
tmp.content,
tmp.cnt ,
row_number() over(distribute by tmp.user_id,tmp.`type` sort by tmp.cnt desc ) rn,
current_timestamp() dw_date
from
(
select
user_id,
'visit_ip' `type` ,
visit_ip content,
sum(pv) cnt
from snbap_dwd.dwd_user_pc_pv
-- where in_time<='2020-09-04' and datediff('2020-09-04',date(in_time))<30
group by
user_id,
visit_ip
union all
-- cookie_id
select
user_id,
'cookie_id' `type`,
cookie_id content,
sum(pv) cnt
from snbap_dwd.dwd_user_pc_pv
-- where in_time<='2020-09-04' and datediff('2020-09-04',date(in_time))<30
group by
user_id,
cookie_id
union all
-- browser_name
select
user_id,
'browser_name' `type` ,
browser_name content,
sum(pv) cnt
from snbap_dwd.dwd_user_pc_pv
-- where in_time<='2020-09-04' and datediff('2020-09-04',date(in_time))<30
group by
user_id,
browser_name
union all
-- visit_os
select
user_id,
'visit_os' `type` ,
visit_os content,
sum(pv) cnt
from snbap_dwd.dwd_user_pc_pv
-- where in_time<='2020-09-04' and datediff('2020-09-04',date(in_time))<30
group by
user_id,
visit_os
) tmp
;
导入DM层数据
# APP端的指标
select
app.user_id,
max(case when drn=1 then app.log_time end) latest_app_visit_time,-- 最近一次访问的时间
max(case when drn=1 then app.app_name end) latest_app_name,-- 最近一次访问的APP名称
max(case when drn=1 then app.visit_os end) latest_app_visit_os,-- 最近一次访问的APP操作系统
max(case when rn=1 then app.log_time end) first_app_visit_time,-- 第一次访问的时间
max(case when rn=1 then app.app_name end) first_app_name,-- 第一次访问的APP名称
max(case when rn=1 then app.visit_os end) first_app_visit_os,-- 第一次访问的APP操作系统
max(case when rn=1 then app.visit_ip end) first_app_visit_ip,-- 第一次访问的ip
sum(app_dt7) day7_app_cnt,
sum(app_dt15) day15_app_cnt,
sum(app_dt30) day30_app_cnt,
sum(app_dt60) day60_app_cnt,
sum(app_dt90) day90_app_cnt,
sum(case when app_dt30=1 and app_hr_025=1 then 1 end) month1_app_hour025_cnt,
sum(case when app_dt30=1 and app_hr_627=1 then 1 end) month1_app_hour627_cnt,
sum(case when app_dt30=1 and app_hr_829=1 then 1 end) month1_app_hour829_cnt,
sum(case when app_dt30=1 and app_hr_10211=1 then 1 end) month1_app_hour10211_cnt,
sum(case when app_dt30=1 and app_hr_12213=1 then 1 end) month1_app_hour12213_cnt,
sum(case when app_dt30=1 and app_hr_14215=1 then 1 end) month1_app_hour14215_cnt,
sum(case when app_dt30=1 and app_hr_16217=1 then 1 end) month1_app_hour16217_cnt,
sum(case when app_dt30=1 and app_hr_18219=1 then 1 end) month1_app_hour18219_cnt,
sum(case when app_dt30=1 and app_hr_20221=1 then 1 end) month1_app_hour20221_cnt,
sum(case when app_dt30=1 and app_hr_22223=1 then 1 end) month1_app_hour22223_cnt
from
(
select
user_id,
imei,
log_time,
log_hour,
visit_os,
os_version,
app_name,
app_version,
device_token,
visit_ip,
province,
city,
row_number() over(distribute by user_id sort by log_time asc) rn,
row_number() over(distribute by user_id sort by log_time desc) drn,
case when log_time>=date_sub('2020-08-10',6) then 1 end app_dt7,
case when log_time>=date_sub('2020-08-10',14) then 1 end app_dt15,
case when log_time>=date_sub('2020-08-10',29) then 1 end app_dt30,
case when log_time>=date_sub('2020-08-10',59) then 1 end app_dt60,
case when log_time>=date_sub('2020-08-10',89) then 1 end app_dt90,
case when hour(log_time) between 0 and 5 then 1 end app_hr_025,
case when hour(log_time) between 6 and 7 then 1 end app_hr_627 ,
case when hour(log_time) between 8 and 9 then 1 end app_hr_829 ,
case when hour(log_time) between 10 and 11 then 1 end app_hr_10211,
case when hour(log_time) between 12 and 13 then 1 end app_hr_12213,
case when hour(log_time) between 14 and 15 then 1 end app_hr_14215,
case when hour(log_time) between 16 and 17 then 1 end app_hr_16217,
case when hour(log_time) between 18 and 19 then 1 end app_hr_18219,
case when hour(log_time) between 20 and 21 then 1 end app_hr_20221,
case when hour(log_time) between 22 and 23 then 1 end app_hr_22223
from snbap_dwd.dwd_user_app_pv
) app
group by app.user_id
;
# 需求一: 用户访问所有指标
-- 创建dm数据库
drop database if exists snbap_dm cascade;
create database snbap_dm;
-- 创建数据表
create table snbap_dm.dm_user_visit as
select
ub.user_id,
ub.user_name,
ub.user_gender,
ub.user_birthday,
ub.user_age,
ub.constellation,
ub.province,
ub.city,
ub.city_level,
ub.e_mail,
ub.op_mail,
ub.mobile,
ub.num_seg_mobile,
ub.op_mobile,
ub.register_time,
ub.login_ip,
ub.login_source,
ub.request_user,
ub.total_score,
ub.used_score,
ub.is_blacklist,
ub.is_married,
ub.education,
ub.monthly_income,
ub.profession,
ub.is_pregnant_woman,
ub.is_have_children,
ub.is_have_car,
ub.phone_brand,
ub.phone_brand_level,
ub.phone_cnt,
ub.change_phone_cnt,
ub.is_maja,
ub.majia_account_cnt,
ub.loyal_model,
ub.shopping_type_model,
ub.weight,
ub.height,
t_pc.latest_pc_visit_date,-- pc最近一次访问日期
t_pc.latest_pc_visit_session,-- pc最近一次访问session_id
t_pc.latest_pc_cookies,-- pc最近一次访问cookie_id
t_pc.latest_pc_pv,-- pc最近一次访问pv数量
t_pc.latest_pc_visit_browser_name,-- pc最近一次访问browser_name
t_pc.latest_pc_visit_os,-- pc最近一次访问visit_os
t_pc.first_pc_visit_date,-- pc第一次一次访问日期
t_pc.first_pc_visit_session,-- pc第一次一次访问session_id
t_pc.first_pc_cookies,-- pc第一次访问cookie_id
t_pc.first_pc_pv, -- pc第一次访问pv数量
t_pc.first_pc_visit_browser_name,-- pc第一次访问browser_name
t_pc.first_pc_visit_os,-- 第一次访问visit_os
t_pc.day7_pc_cnt,-- 近7天pc访问次数
t_pc.day15_pc_cnt, -- 近15天pc访问次数
t_pc.month1_pc_cnt, -- 近30天pc访问次数
t_pc.month2_pc_cnt, -- 近60天pc访问次数
t_pc.month3_pc_cnt, -- 近90天pc访问次数
t_pc.month1_pc_pv, -- 近30天pc端pv
t_pc.month1_pc_avg_pv, -- 近30天pc端平均pv
t_pc.month1_pc_hour025_cnt,-- 近30天pc端访问0到5点pv数
t_pc.month1_pc_hour627_cnt,-- 近30天pc端访问6 到7 点pv数
t_pc.month1_pc_hour829_cnt,-- 近30天pc端访问8 到9 点pv数
t_pc.month1_pc_hour10211_cnt,-- 近30天pc端访问10到11点pv数
t_pc.month1_pc_hour12213_cnt,-- 近30天pc端访问12到13点pv数
t_pc.month1_pc_hour14215_cnt,-- 近30天pc端访问14到15点pv数
t_pc.month1_pc_hour16217_cnt,-- 近30天pc端访问16到17点pv数
t_pc.month1_pc_hour18219_cnt,-- 近30天pc端访问18到19点pv数
t_pc.month1_pc_hour20221_cnt,-- 近30天pc端访问20到21点pv数
t_pc.month1_pc_hour22223_cnt, -- 近30天pc端访问22到23点pv数
t_month1.month1_pc_diff_ip_cnt, -- 30天内使用的不同ip数量
t_month1.month1_pc_common_ip_cnt, -- 30天内常用ip
t_month1.month1_pc_diff_cookie_cnt, -- 30天内使用的不同cookie数量
t_month1.month1_pc_common_cookie_cnt, -- 30天内常用cookie
t_month1.month1_pc_common_browser_name, -- 30天内常用browser
t_month1.month1_pc_common_os, -- 30天内常用操作系统
t_app.latest_app_visit_time,-- 最近一次访问app的时间
t_app.latest_app_name,-- 最近一次访问的APP名称
t_app.latest_app_visit_os,-- 最近一次访问的APP操作系统
t_app.first_app_visit_time,-- 第一次访问app的时间
t_app.first_app_name,-- 第一次访问的APP名称
t_app.first_app_visit_os,-- 第一次访问的APP操作系统
t_app.first_app_visit_ip,-- 第一次访问app的ip
t_app.day7_app_cnt, -- app端近7天访问次数
t_app.day15_app_cnt, -- app端近115天访问次数
t_app.day30_app_cnt, -- app端近30天访问次数
t_app.day60_app_cnt, -- app端近60天访问次数
t_app.day90_app_cnt, -- app端近90天访问次数
t_app.month1_app_hour025_cnt, -- 近30天app端访问0到5点访问数
t_app.month1_app_hour627_cnt, -- 近30天app端访问6到7点访问数
t_app.month1_app_hour829_cnt, -- 近30天app端访问8到9点访问数
t_app.month1_app_hour10211_cnt, -- 近30天app端访问10到11点访问数
t_app.month1_app_hour12213_cnt, -- 近30天app端访问12到13点访问数
t_app.month1_app_hour14215_cnt, -- 近30天app端访问14到15点访问数
t_app.month1_app_hour16217_cnt, -- 近30天app端访问16到17点访问数
t_app.month1_app_hour18219_cnt, -- 近30天app端访问18到19点访问数
t_app.month1_app_hour20221_cnt, -- 近30天app端访问20到21点访问数
t_app.month1_app_hour22223_cnt, -- 近30天app端访问22到23点访问数
case when t_pc.latest_pc_visit_date>t_app.latest_app_visit_time then t_pc.latest_pc_visit_ip else t_app.latest_app_visit_ip end latest_visit_ip, -- 最近一次访问的ip
case when t_pc.latest_pc_visit_date>t_app.latest_app_visit_time then t_pc.province else t_app.province end latest_visit_province, -- 最近一次访问的省
case when t_pc.latest_pc_visit_date>t_app.latest_app_visit_time then t_pc.city else t_app.city end latest_visit_city, -- 最近一次访问的城市
case when t_pc.first_pc_visit_date<t_app.first_app_visit_time then t_pc.first_pc_visit_ip else t_app.first_app_visit_ip end first_visit_ip, -- 第一次访问的ip
case when t_pc.first_pc_visit_date<t_app.first_app_visit_time then t_pc.province else t_app.province end first_visit_province, -- 第一次访问的省
case when t_pc.first_pc_visit_date<t_app.first_app_visit_time then t_pc.city else t_app.city end first_visit_city -- 第一次访问的城市
from
snbap_dws.dws_user_basic ub
join
(
-- pc端指标
select
pc.user_id,
pc.province,
pc.city,
max(case when pc.drn=1 then pc.in_time end) latest_pc_visit_date,-- 最近一次访问日期
max(case when pc.drn=1 then pc.session_id end) latest_pc_visit_session,-- 最近一次访问session_id
max(case when pc.drn=1 then pc.cookie_id end) latest_pc_cookies,-- 最近一次访问cookie_id
max(case when pc.drn=1 then pc.pv end) latest_pc_pv,-- 最近一次访问pv数量
max(case when pc.drn=1 then pc.browser_name end) latest_pc_visit_browser_name,-- 最近一次访问browser_name
max(case when pc.drn=1 then pc.visit_os end) latest_pc_visit_os,-- 最近一次访问visit_os
max(case when pc.drn=1 then pc.visit_ip end) latest_pc_visit_ip,-- 最近一次访问visit_ip
max(case when pc.rn=1 then pc.in_time end) first_pc_visit_date,-- 第一次一次访问日期
max(case when pc.rn=1 then pc.session_id end) first_pc_visit_session,-- 第一次一次访问session_id
max(case when pc.rn=1 then pc.cookie_id end) first_pc_cookies,-- 第一次一次访问cookie_id
max(case when pc.rn=1 then pc.pv end) first_pc_pv, -- 第一次一次访问pv数量
max(case when pc.rn=1 then pc.browser_name end) first_pc_visit_browser_name,-- 第一次一次访问browser_name
max(case when pc.rn=1 then pc.visit_os end) first_pc_visit_os,-- 第一次一次访问visit_os
max(case when pc.rn=1 then pc.visit_ip end) first_pc_visit_ip,-- 第一次一次访问visit_ip
sum(pc.dt7) day7_pc_cnt,-- 近7天访问次数
sum(pc.dt15) day15_pc_cnt, -- 近15天访问次数
sum(pc.dt30) month1_pc_cnt, -- 近30天访问次数
sum(pc.dt60) month2_pc_cnt, -- 近60天访问次数
sum(pc.dt90) month3_pc_cnt, -- 近90天访问次数
sum(case when pc.dt30=1 then pc.pv end ) month1_pc_pv, -- 近30天pc端pv
sum(case when pc.dt30=1 then pc.pv end )/sum(pc.dt30) month1_pc_avg_pv, -- 近30天pc端平均pv
sum(case when pc.dt30=1 and pc.hr025=1 then pc.pv end) month1_pc_hour025_cnt, -- 近30天pc端访问0到5点pv数
sum(case when pc.dt30=1 and pc.hr627=1 then pc.pv end) month1_pc_hour627_cnt, -- 近30天pc端访问6 到7 点pv数
sum(case when pc.dt30=1 and pc.hr829 =1 then pc.pv end) month1_pc_hour829_cnt, -- 近30天pc端访问8 到9 点pv数
sum(case when pc.dt30=1 and pc.hr10211=1 then pc.pv end) month1_pc_hour10211_cnt, -- 近30天pc端访问10到11点pv数
sum(case when pc.dt30=1 and pc.hr12213=1 then pc.pv end) month1_pc_hour12213_cnt, -- 近30天pc端访问12到13点pv数
sum(case when pc.dt30=1 and pc.hr14215=1 then pc.pv end) month1_pc_hour14215_cnt, -- 近30天pc端访问14到15点pv数
sum(case when pc.dt30=1 and pc.hr16217=1 then pc.pv end) month1_pc_hour16217_cnt, -- 近30天pc端访问16到17点pv数
sum(case when pc.dt30=1 and pc.hr18219=1 then pc.pv end) month1_pc_hour18219_cnt, -- 近30天pc端访问18到19点pv数
sum(case when pc.dt30=1 and pc.hr20221=1 then pc.pv end) month1_pc_hour20221_cnt, -- 近30天pc端访问20到21点pv数
sum(case when pc.dt30=1 and pc.hr22223=1 then pc.pv end) month1_pc_hour22223_cnt -- 近30天pc端访问22到23点pv数
from
(
select
user_id,
session_id,
cookie_id,
in_time,
out_time,
stay_time,
pv,
visit_os,
browser_name,
visit_ip,
province,
city,
row_number() over(distribute by user_id sort by in_time asc) rn,
row_number() over(distribute by user_id sort by in_time desc) drn,
case when in_time>=date_sub('2020-08-10',6) then 1 end dt7,
case when in_time>=date_sub('2020-08-10',14) then 1 end dt15,
case when in_time>=date_sub('2020-08-10',29) then 1 end dt30,
case when in_time>=date_sub('2020-08-10',59) then 1 end dt60,
case when in_time>=date_sub('2020-08-10',89) then 1 end dt90,
case when hour(in_time) between 0 and 5 then 1 end hr025,
case when hour(in_time) between 6 and 7 then 1 end hr627 ,
case when hour(in_time) between 8 and 9 then 1 end hr829 ,
case when hour(in_time) between 10 and 11 then 1 end hr10211,
case when hour(in_time) between 12 and 13 then 1 end hr12213,
case when hour(in_time) between 14 and 15 then 1 end hr14215,
case when hour(in_time) between 16 and 17 then 1 end hr16217,
case when hour(in_time) between 18 and 19 then 1 end hr18219,
case when hour(in_time) between 20 and 21 then 1 end hr20221,
case when hour(in_time) between 22 and 23 then 1 end hr22223
from snbap_dwd.dwd_user_pc_pv
) pc
group by
pc.user_id,
pc.province,
pc.city
) t_pc on ub.user_id=t_pc.user_id
join
(
select
user_id,
count(case when type='visit_ip' then content end ) month1_pc_diff_ip_cnt,
max(case when rn=1 and type='visit_ip' then content end) month1_pc_common_ip_cnt,
count(case when type='cookie_id' then content end ) month1_pc_diff_cookie_cnt,
max(case when rn=1 and type='cookie_id' then content end) month1_pc_common_cookie_cnt,
max(case when type='browser_name' then content end ) month1_pc_common_browser_name,
max(case when rn=1 and type='visit_os' then content end) month1_pc_common_os
from snbap_dws.dws_user_visit_month1
group by user_id
) t_month1 on t_month1.user_id=ub.user_id
join
(
-- APP端指标
select
app.user_id,
app.province,
app.city,
max(case when drn=1 then app.log_time end) latest_app_visit_time,-- 最近一次访问的时间
max(case when drn=1 then app.app_name end) latest_app_name,-- 最近一次访问的APP名称
max(case when drn=1 then app.visit_os end) latest_app_visit_os,-- 最近一次访问的APP操作系统
max(case when drn=1 then app.visit_ip end) latest_app_visit_ip,-- 最近一次访问的APP的ip
max(case when rn=1 then app.log_time end) first_app_visit_time,-- 第一次访问的时间
max(case when rn=1 then app.app_name end) first_app_name,-- 第一次访问的APP名称
max(case when rn=1 then app.visit_os end) first_app_visit_os,-- 第一次访问的APP操作系统
max(case when rn=1 then app.visit_ip end) first_app_visit_ip,-- 第一次访问的ip
sum(app_dt7) day7_app_cnt,
sum(app_dt15) day15_app_cnt,
sum(app_dt30) day30_app_cnt,
sum(app_dt60) day60_app_cnt,
sum(app_dt90) day90_app_cnt,
sum(case when app_dt30=1 and app_hr_025=1 then 1 end) month1_app_hour025_cnt,
sum(case when app_dt30=1 and app_hr_627=1 then 1 end) month1_app_hour627_cnt,
sum(case when app_dt30=1 and app_hr_829=1 then 1 end) month1_app_hour829_cnt,
sum(case when app_dt30=1 and app_hr_10211=1 then 1 end) month1_app_hour10211_cnt,
sum(case when app_dt30=1 and app_hr_12213=1 then 1 end) month1_app_hour12213_cnt,
sum(case when app_dt30=1 and app_hr_14215=1 then 1 end) month1_app_hour14215_cnt,
sum(case when app_dt30=1 and app_hr_16217=1 then 1 end) month1_app_hour16217_cnt,
sum(case when app_dt30=1 and app_hr_18219=1 then 1 end) month1_app_hour18219_cnt,
sum(case when app_dt30=1 and app_hr_20221=1 then 1 end) month1_app_hour20221_cnt,
sum(case when app_dt30=1 and app_hr_22223=1 then 1 end) month1_app_hour22223_cnt
from
(
select
user_id,
imei,
log_time,
log_hour,
visit_os,
os_version,
app_name,
app_version,
device_token,
visit_ip,
province,
city,
row_number() over(distribute by user_id sort by log_time asc) rn,
row_number() over(distribute by user_id sort by log_time desc) drn,
case when log_time>=date_sub('2020-08-10',6) then 1 end app_dt7,
case when log_time>=date_sub('2020-08-10',14) then 1 end app_dt15,
case when log_time>=date_sub('2020-08-10',29) then 1 end app_dt30,
case when log_time>=date_sub('2020-08-10',59) then 1 end app_dt60,
case when log_time>=date_sub('2020-08-10',89) then 1 end app_dt90,
case when hour(log_time) between 0 and 5 then 1 end app_hr_025,
case when hour(log_time) between 6 and 7 then 1 end app_hr_627 ,
case when hour(log_time) between 8 and 9 then 1 end app_hr_829 ,
case when hour(log_time) between 10 and 11 then 1 end app_hr_10211,
case when hour(log_time) between 12 and 13 then 1 end app_hr_12213,
case when hour(log_time) between 14 and 15 then 1 end app_hr_14215,
case when hour(log_time) between 16 and 17 then 1 end app_hr_16217,
case when hour(log_time) between 18 and 19 then 1 end app_hr_18219,
case when hour(log_time) between 20 and 21 then 1 end app_hr_20221,
case when hour(log_time) between 22 and 23 then 1 end app_hr_22223
from snbap_dwd.dwd_user_app_pv
) app
group by
app.user_id,
app.province,
app.city
) t_app on t_app.user_id=ub.user_id
;