说明:使用shell脚本增量调度至hive表。
操作步骤
1、使用sqoop蒋mysql数据调度到hive临时表tmp_bst_bas_dy_orders
2、将临时表中的数据覆盖写如hive对应分区内
临时表数据未压缩,以传统txt方式存取,hive表采用orc压缩,均为外部表,按年月日进行分区。数据库连接信息配置在$HOME/shell_code/config/env_config文件中
使用 sh bst_bas_dy_orders.sh 20190101 20190105 进行数据调度
bst_bas_dy_orders.sh
#!/bin/bash
config_path=$HOME/shell_code/config
source ${config_path}/env_config
begin_date=$1
end_date=$2
v_dt_year=${begin_date:0:4}
v_dt_month=${begin_date:4:2}
v_dt_date=${begin_date}
zip_code=org.apache.hadoop.io.compress.SnappyCodec
tab_rank=tmp
tab_source_sys=bst_dy_wyc/ics_sale_order
tmp_target_tab=tmp_bst_bas_dy_orders
hive_target_tab=bst_bas_dy_orders
tmp_hdfs_file_path=/user/hive/external/tables/${tab_rank}/${tab_source_sys}/${tmp_target_tab}
db_ip=${mysql_wyc_ip}
db_port=${mysql_wyc_port}
db_user=${mysql_wyc_user}
db_passwd=${mysql_wyc_password}
db_database=ics_sale_order
while [ ${begin_date} -lt ${end_date} ]
do
month_len=${#v_dt_month}
if [ ${month_len} -lt 2 ]
then
v_dt_month="0"${v_dt_month}
else
v_dt_month=${v_dt_month}
fi
echo $v_dt_year,$v_dt_month,$v_dt_date
begin_date_s=$v_dt_date
mid_date=`date -d "-1 day ago $begin_date_s" +%Y%m%d`
sqoop_sql="SELECT o.order_no,
o.centre_order_no,
o.user_no,
o.channel_no,
o.product_no,
o.contact_phone,
o.contact_name,
o.state,
o.amount,
o.centre_amount,
o.dis_amount,
o.real_amount,
o.pay_amount,
o.depart_date,
o.depart_time,
o.pay_order_no,
o.pay_platform_no,
o.pay_trade_type,
o.pay_succeed_time,
o.refund_amount,
o.refund_dis_amount,
o.refund_fee,
o.centre_refund_amount,
o.centre_refund_fee,
o.biz_no,
o.expire_time,
o.place_time,
o.make_time,
o.complete_time,
o.revoke_time,
o.revoke_reason,
o.confirm_time,
o.refund_channel_no,
o.refund_admin_no,
o.refund_time,
o.refund_reason,
o.deleted,
o.coupon_id,
o.coupon_dis_amount,
o.real_refund_amount,
o.invoice_fee,
o.insuranceNum,
o.provider_no,
o.supplier_no,
o.remark,
o.create_time,
o.insuranceAmount,
o.insuranced,
o.update_time
FROM ics_sale_order.orders o INNER
JOIN (SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.make_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.make_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.pay_succeed_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.pay_succeed_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.create_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.create_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.update_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.update_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.confirm_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.confirm_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.revoke_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.revoke_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.place_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.place_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
UNION
SELECT o.order_no
FROM ics_sale_order.orders o
WHERE o.refund_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')
AND o.refund_time < STR_TO_DATE('${mid_date}', '%Y%m%d')
) tmp
ON o.order_no = tmp.order_no
where \$CONDITIONS
"
hadoop fs -rm -r ${tmp_hdfs_file_path}
sqoop import --connect jdbc:mysql://${db_ip}:${db_port}/${db_database}?serverTimezone=Asia/Shanghai --username ${db_user} --password ${db_passwd} \
--query "${sqoop_sql}" \
--target-dir "${tmp_hdfs_file_path}/dt_year=${v_dt_year}/dt_month=${v_dt_month}/dt_date=${v_dt_date}" \
--fields-terminated-by "," \
--delete-target-dir --fetch-size 5000 \
--num-mappers 1 --compress --compression-codec ${zip_code}
hql_str="select order_no,centre_order_no,user_no,channel_no,product_no,contact_phone,contact_name,state,amount,centre_amount,dis_amount,real_amount,pay_amount,depart_date,depart_time,pay_order_no,pay_platform_no,pay_trade_type,pay_succeed_time,refund_amount,refund_dis_amount,refund_fee,centre_refund_amount,centre_refund_fee,biz_no,expire_time,place_time,make_time,complete_time,revoke_time,revoke_reason,confirm_time,refund_channel_no,refund_admin_no,refund_time,refund_reason,deleted,coupon_id,coupon_dis_amount,real_refund_amount,invoice_fee,insurancenum,provider_no,supplier_no,remark,create_time,insuranceamount,insuranced,update_time,current_timestamp() as etl_date from ${tmp_target_tab} where dt_date='${v_dt_date}'"
hive_e_str="
use dw_dy_wyc;
alter table ${tmp_target_tab} drop partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');
alter table ${hive_target_tab} drop partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');
alter table ${tmp_target_tab} add partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');
alter table ${hive_target_tab} add partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');
insert overwrite table ${hive_target_tab} partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}') ${hql_str};
alter table ${tmp_target_tab} drop partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');
"
echo "Doing HQL!!!!"
hive -e "$hive_e_str"
echo $hive_e_str
begin_date=${mid_date}
v_dt_year=${begin_date:0:4}
v_dt_month=${begin_date:4:2}
v_dt_date=${begin_date}
echo "Done HQL OVER !!! IS OK !!!! "
done
echo "ALL ALL ALL ${hive_target_tab} !!! IS OK !!!! "