创建tmp表
#!/bin/bash
begin_date=$1
end_date=$2
target_table=default_catalog.default_database.sink_batch_feature_result
v_dt_year=${begin_date:0:4}
echo $v_dt_year,$begin_date,$end_date
query_sql="
with tmp as
( select
topic ,
key ,
send_time ,
_key_suffix ,
_expire ,
_ignore_value ,
feature ,
_feature_suffix ,
val
from(
select
'user' as topic,
key,
now_timestemp(key) as send_time,
substr('${end_date}',7,2) as _key_suffix,
cast(3600*24*2 as bigint) as _expire,
'' as _ignore_value,
'user_c_neg_feedback_jd_mismatch_jobtype3_30d' as feature,
'' as _feature_suffix,
val
from(
select
user_id as key,
concat('{', concat_ws(',', LISTAGG(concat('"jtype3":"',cast(jd_subtype_id as char), ',"fbcnt:"',cast(cnt as char), ',"fbdate:"',cast(intervaldate as char)))), '}') as val
from(
select
bb.jd_no,
bb.user_id,
bb.jd_subtype_id,
count(bb.jd_no) as cnt,
datediff(substr('${end_date}',7,2),substring (bb.c_inappropriate_time,1,8)) as intervaldate,
row_number() over(partition by user_id order by count(jd_no) desc) as rank_no
from
(select
a.user_id,
a.jd_no,
a.c_inappropriate_time,
b.jd_subtype_id
from(select
distinct
user_id,
jd_no,
c_inappropriate_time
from
myhive.dwb.dwb_pulsar_c_inappropriate_hour
where
dt >= ${begin_date}
and dt <= ${end_date}
and refer_type ='9'
and c_inappropriate_reason = '1'
)a
left join
(select
distinct jd_no,
jd_subtype_id
from
myhive.dwd.dwd_cnt_jd_all
where
dt = ${v_dt_year}
)b
on a.jd_no = b.jd_no)bb
group by bb.jd_no,bb.user_id,bb.jd_subtype_id,datediff(substr('${end_date}',7,2),substring (bb.c_inappropriate_time,1,8))
)c where rank_no <= 10 group by user_id
)d
)e where val <>'{}' and val is not null and val <>'')
insert overwrite table ${target_table} partition(dt_year='${v_dt_year}')
SELECT date_format(dwt.send_time, 'yyyyMMdd') AS send_time,
df.provider_no,
df.supplier_no,
dwt.driver_no,
df.driver_name,
df.telephone,
dwt.business_no,
vf.vehicle_num,
vf.vehicle_no,
dwt.topic,
dwt.work_end_time,
dwt.key,
dwt.feature,
dwt.val,
current_timestamp() as etl_date
FROM tmp dwt
LEFT JOIN dwd.dwd_campus_service_info df
ON dwt.topic= df.user_id
"
echo 'start execute hive sql !'
hive_sql="
use dwd;
set hive.exec.parallel=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.merge.mapfiles = true;
set hive.merge.mapredfiles=true;
set hive.merge.size.per.task = 256000000;
set hive.merge.smallfiles.avgsize=16000000;
alter table ${target_table} drop partition(dt_year='${v_dt_year}');
alter table ${target_table} add partition(dt_year='${v_dt_year}');
${query_sql};
"
echo $hive_sql
hive -e "$hive_sql"
echo 'hive sql is execute OK !'