APP数据模拟处理流程—[shell脚本]

#!/bin/bash
day_str=`date -d '-1 day' +'%Y-%m-%d'`


inpath=/app-log-data/data/$day_str
outpath=/app-log-data/clean/${day_str}-clean


hive_exec=/root/apps/hive-1.2.1/bin/hive


echo "准备清洗$day_str数据......"


/root/apps/hadoop-2.8.3/bin/hadoop jar /root/data-clean.jar cn.edu360.app.log.mr.AppLogDataClean $inpath $outpath


echo "正在改变ods_app_log数据表分区的结果...."


HQL1_ALTER_TABLE_org_app_log_android="
ALTER TABLE user_use.ods_app_log ADD PARTITION (day = \"$day_str\",os = 'android') location '/app-log-data/clean/$day_str/android';
"


HQL2_ALTER_TABLE_org_app_log_ios="
ALTER TABLE user_use.ods_app_log ADD PARTITION (day =\"$day_str\",os = 'ios') location '/app-log-data/clean/$day_str/ios';
"


$hive_exec -e "$HQL1_ALTER_TABLE_org_app_log_android"
$hive_exec -e "$HQL2_ALTER_TABLE_org_app_log_ios"






echo "数据正在导入ods_app_log分区表中..."


HQL1_LOAD_DATA_org_app_log_android="
load data inpath '/app-log-data/clean/${day_str}-clean/android' into table user_use.ods_app_log partition(day=\"$day_str\",os = 'android');
"
HQL2_LOAD_DATA_org_app_log_ios="
load data inpath '/app-log-data/clean/${day_str}-clean/ios' into table user_use.ods_app_log partition(day=\"$day_str\",os = 'ios');
"


$hive_exec -e "$HQL1_LOAD_DATA_org_app_log_android"
$hive_exec -e "$HQL2_LOAD_DATA_org_app_log_ios"


echo "正在向日活数据表插入数据...."


HQL1_INSERT_TABLE_etl_user_active_day="
INSERT INTO TABLE user_use.etl_user_active_day PARTITION (day=\"$day_str\")
SELECT sdk_ver
    ,time_zone
    ,commit_id
    ,commit_time
    ,pid
    ,app_token
    ,app_id
    ,device_id
    ,device_id_type
    ,release_channel
    ,app_ver_name
    ,app_ver_code
    ,os_name
    ,os_ver
    ,LANGUAGE
    ,country
    ,manufacture
    ,device_model
    ,resolution
    ,net_type
    ,account
    ,app_device_id
    ,mac
    ,android_id
    ,imei
    ,cid_sn
    ,build_num
    ,mobile_data_type
    ,promotion_channel
    ,carrier
    ,city
    ,user_id
FROM (
    SELECT *
        ,row_number() OVER (
            PARTITION BY user_id ORDER BY commit_time
            ) AS rn
    FROM user_use.ods_app_log
    WHERE day=\"$day_str\"
    ) tmp
WHERE rn = 1;
"


$hive_exec -e "$HQL1_INSERT_TABLE_etl_user_active_day"


echo "正在向维度日活数据表插入数据....."


HQL1_INSERT_TABLE_dim_user_active_day="
from user_use.etl_user_active_day


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0000')
select 'all','all','all','all',count(1)
where day =\"$day_str\"


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1000')
select os_name,'all','all','all',count(1)
where day =\"$day_str\"
group by os_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0100')
select 'all',city,'all','all',count(1)
where day =\"$day_str\"
group by city


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0010')
select 'all','all',release_channel,'all',count(1)
where day =\"$day_str\"
group by release_channel


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0001')
select 'all','all','all',app_ver_name,count(1)
where day =\"$day_str\"
group by app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1100')
select os_name,city,'all','all',count(1)
where day = \"$day_str\"
group by os_name,city


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1010')
select os_name,'all',release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,release_channel


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1001')
select os_name,'all','all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0110')
select 'all',city,release_channel,'all',count(1)
where day = \"$day_str\"
group by city,release_channel


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0101')
select 'all',city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by city,app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0011')
select 'all','all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by release_channel,app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1110')
select os_name,city,release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,city,release_channel


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '0111')
select 'all',city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by city,release_channel,app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1101')
select os_name,city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1011')
select os_name,'all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,release_channel,app_ver_name


insert into table user_use.dim_user_active_day partition(day = \"$day_str\",dim = '1111')
select os_name,city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,release_channel,app_ver_name;
"


$hive_exec -e "$HQL1_INSERT_TABLE_dim_user_active_day"


echo "正在统计当日活跃-历史用户表,向当日新增用户表分区表插入数据......"


HQL1_INSERT_TABLE_etl_user_new_day="
insert  into user_use.etl_user_new_day partition(day=\"$day_str\")
SELECT sdk_ver
    ,time_zone
    ,commit_id
    ,commit_time
    ,pid
    ,app_token
    ,app_id
    ,device_id
    ,device_id_type
    ,release_channel
    ,app_ver_name
    ,app_ver_code
    ,os_name
    ,os_ver
    ,LANGUAGE
    ,country
    ,manufacture
    ,device_model
    ,resolution
    ,net_type
    ,account
    ,app_device_id
    ,mac
    ,android_id
    ,imei
    ,cid_sn
    ,build_num
    ,mobile_data_type
    ,promotion_channel
    ,carrier
    ,city
    ,a.user_id
from  user_use.etl_user_active_day a left join  user_use.etl_user_history b on a.user_id = b.user_id
where a.day=\"$day_str\" and b.user_id is null;
"


$hive_exec -e "$HQL1_INSERT_TABLE_etl_user_new_day"




echo "正在将日新增用户的user_id追加到历史表...."


HQL1_INSERT_TABLE_etl_user_history="
insert into table user_use.etl_user_history
select user_id from user_use.etl_user_new_day where day=\"$day_str\";
"


$hive_exec -e "$HQL1_INSERT_TABLE_etl_user_history"


echo "正在更新日新维度数据表......"


HQL1_INSERT_TABLE_dim_user_new_day="
from user_use.etl_user_new_day


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0000')
select 'all','all','all','all',count(1)
where day =\"$day_str\"


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1000')
select os_name,'all','all','all',count(1)
where day =\"$day_str\"
group by os_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0100')
select 'all',city,'all','all',count(1)
where day =\"$day_str\"
group by city


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0010')
select 'all','all',release_channel,'all',count(1)
where day =\"$day_str\"
group by release_channel


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0001')
select 'all','all','all',app_ver_name,count(1)
where day =\"$day_str\"
group by app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1100')
select os_name,city,'all','all',count(1)
where day = \"$day_str\"
group by os_name,city


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1010')
select os_name,'all',release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,release_channel


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1001')
select os_name,'all','all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0110')
select 'all',city,release_channel,'all',count(1)
where day = \"$day_str\"
group by city,release_channel


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0101')
select 'all',city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by city,app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0011')
select 'all','all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by release_channel,app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1110')
select os_name,city,release_channel,'all',count(1)
where day = \"$day_str\"
group by os_name,city,release_channel


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '0111')
select 'all',city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by city,release_channel,app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1101')
select os_name,city,'all',app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1011')
select os_name,'all',release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,release_channel,app_ver_name


insert into table user_use.dim_user_new_day partition(day = \"$day_str\",dim = '1111')
select os_name,city,release_channel,app_ver_name,count(1)
where day = \"$day_str\"
group by os_name,city,release_channel,app_ver_name;
"
$hive_exec -e "$HQL1_INSERT_TABLE_dim_user_new_day"




echo "初级数据建模完成......"

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值