#!/bin/bash
day_str=`date -d '-1 day' +'%Y-%m-%d'`
inpath=/app-log-data/data/$day_str
outpath=/app-log-data/clean/${day_str}-clean
hive_exec=/root/apps/hive-1.2.1/bin/hive
echo "准备清洗$day_str数据......"
/root/apps/hadoop-2.8.3/bin/hadoop jar /root/data-clean.jar cn.edu360.app.log.mr.AppLogDataClean $inpath $outpath
echo "正在改变ods_app_log数据表分区的结果...."
HQL1_ALTER_TABLE_org_app_log_android="
ALTER TABLE user_use.ods_app_log ADD PARTITION (day = \"$day_str\",os = 'android') location '/app-log-data/clean/$day_str/android';
"
HQL2_ALTER_TABLE_org_app_log_ios="
ALTER TABLE user_use.ods_app_log ADD PARTITION (day =\"$day_str\",os = 'ios') location '/app-log-data/clean/$day_str/ios';
"
$hive_exec -e "$HQL1_ALTER_TABLE_org_app_log_android"
$hive_exec -e "$HQL2_ALTER_TABLE_org_app_log_ios"
echo "数据正在导入ods_app_log分区表中..."
HQL1_LOAD_DATA_org_app_log_android="
load data inpath '/app-log-data/clean/${day_str}-clean/android' into table user_use.ods_app_log partition(day=\"$day_str\",os = 'android');
"
HQL2_LOAD_DATA_org_app_log_ios="
load data inpath '/app-log-data/clean/${day_str}-clean/ios' into table user_use.ods_app_log partition(day=\"$day_str\",os = 'ios');
"
$hive_exec -e "$HQL1_LOAD_DATA_org_app_log_android"
$hive_exec -e "$HQL2_LOAD_DATA_org_app_log_ios"
echo "正在向日活数据表插入数据...."
HQL1_INSERT_TABLE_etl_user_active_day="
INSERT INTO TABLE user_use.etl_user_active_day PARTITION (day=\"$day_str\")
SELECT sdk_ver
,time_zone
,commit_id
,commit_time
,pid
,app_token
,app_id
,device_id
,device_id_type
,release_channel
,app_ver_name
,app_ver_code
,os_name
,os_ver
,LANGUAGE
,country
,manufacture
,device_model
,resolution
,net_type
,account
,app_device_id
,mac
,android_id
,imei
,cid_sn
,build_num
,mobile_data_type
,promotion_channel
,carrier
,city
,user_id
FROM (
SELECT *
,row_number() OVER (
PARTITION BY user_id ORDER BY commit_time
) AS rn
FROM user_use.ods_app_log
WHERE day=\"$day_str\"
) tmp
WHERE rn = 1;
"
$hive_exec -e "$HQL1_INSERT_TABLE_etl_user_active_day"
echo "正在向维度日活数据表插入数据....."
HQL1_INSERT_TABLE_dim_user_active_day="
from user_use.etl_user_active_day
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0000')
select 'all','all','all','all',count(1)
where day =\"$day_str\"
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1000')
select os_name,'all','all','all',count(1)
where day =\"$day_str\"
group by os_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0100')
select 'all',city,'all','all',count(1)
where day =\"$day_str\"
group by city
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0010')
select 'all','all',release_channel,'all',count(1)
where day =\"$day_str\"
group by release_channel
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0001')
select 'all','all','all',app_ver_name,count(1)
where day =\"$day_str\"
group by app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1100')
select os_name,city,'all','all',count(1)
where day = \"$day_str\"
group by os_name,city
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1010')
select os_name,'all',release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,release_channel
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1001')
select os_name,'all','all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0110')
select 'all',city,release_channel,'all',count(1)
where day = \"$day_str\"
group by city,release_channel
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0101')
select 'all',city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by city,app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0011')
select 'all','all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by release_channel,app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1110')
select os_name,city,release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,city,release_channel
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0111')
select 'all',city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by city,release_channel,app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1101')
select os_name,city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1011')
select os_name,'all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,release_channel,app_ver_name
insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1111')
select os_name,city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,release_channel,app_ver_name;
"
$hive_exec -e "$HQL1_INSERT_TABLE_dim_user_active_day"
echo "正在统计当日活跃-历史用户表,向当日新增用户表分区表插入数据......"
HQL1_INSERT_TABLE_etl_user_new_day="
insert into user_use.etl_user_new_day partition(day=\"$day_str\")
SELECT sdk_ver
,time_zone
,commit_id
,commit_time
,pid
,app_token
,app_id
,device_id
,device_id_type
,release_channel
,app_ver_name
,app_ver_code
,os_name
,os_ver
,LANGUAGE
,country
,manufacture
,device_model
,resolution
,net_type
,account
,app_device_id
,mac
,android_id
,imei
,cid_sn
,build_num
,mobile_data_type
,promotion_channel
,carrier
,city
,a.user_id
from user_use.etl_user_active_day a left join user_use.etl_user_history b on a.user_id = b.user_id
where a.day=\"$day_str\" and b.user_id is null;
"
$hive_exec -e "$HQL1_INSERT_TABLE_etl_user_new_day"
echo "正在将日新增用户的user_id追加到历史表...."
HQL1_INSERT_TABLE_etl_user_history="
insert into table user_use.etl_user_history
select user_id from user_use.etl_user_new_day where day=\"$day_str\";
"
$hive_exec -e "$HQL1_INSERT_TABLE_etl_user_history"
echo "正在更新日新维度数据表......"
HQL1_INSERT_TABLE_dim_user_new_day="
from user_use.etl_user_new_day
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0000')
select 'all','all','all','all',count(1)
where day =\"$day_str\"
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1000')
select os_name,'all','all','all',count(1)
where day =\"$day_str\"
group by os_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0100')
select 'all',city,'all','all',count(1)
where day =\"$day_str\"
group by city
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0010')
select 'all','all',release_channel,'all',count(1)
where day =\"$day_str\"
group by release_channel
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0001')
select 'all','all','all',app_ver_name,count(1)
where day =\"$day_str\"
group by app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1100')
select os_name,city,'all','all',count(1)
where day = \"$day_str\"
group by os_name,city
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1010')
select os_name,'all',release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,release_channel
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1001')
select os_name,'all','all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0110')
select 'all',city,release_channel,'all',count(1)
where day = \"$day_str\"
group by city,release_channel
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0101')
select 'all',city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by city,app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0011')
select 'all','all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by release_channel,app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1110')
select os_name,city,release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,city,release_channel
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0111')
select 'all',city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by city,release_channel,app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1101')
select os_name,city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1011')
select os_name,'all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,release_channel,app_ver_name
insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1111')
select os_name,city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,release_channel,app_ver_name;
"
$hive_exec -e "$HQL1_INSERT_TABLE_dim_user_new_day"
echo "初级数据建模完成......"
APP数据模拟处理流程—[shell脚本]
最新推荐文章于 2022-07-13 11:22:24 发布