mysql离线数据导入_MySQL数据离线加载至hive应用实践

说明:使用shell脚本增量调度至hive表。

操作步骤

1、使用sqoop蒋mysql数据调度到hive临时表tmp_bst_bas_dy_orders

2、将临时表中的数据覆盖写如hive对应分区内

临时表数据未压缩,以传统txt方式存取,hive表采用orc压缩,均为外部表,按年月日进行分区。数据库连接信息配置在$HOME/shell_code/config/env_config文件中

使用 sh bst_bas_dy_orders.sh  20190101 20190105 进行数据调度

bst_bas_dy_orders.sh

#!/bin/bash

config_path=$HOME/shell_code/config

source ${config_path}/env_config

begin_date=$1

end_date=$2

v_dt_year=${begin_date:0:4}

v_dt_month=${begin_date:4:2}

v_dt_date=${begin_date}

zip_code=org.apache.hadoop.io.compress.SnappyCodec

tab_rank=tmp

tab_source_sys=bst_dy_wyc/ics_sale_order

tmp_target_tab=tmp_bst_bas_dy_orders

hive_target_tab=bst_bas_dy_orders

tmp_hdfs_file_path=/user/hive/external/tables/${tab_rank}/${tab_source_sys}/${tmp_target_tab}

db_ip=${mysql_wyc_ip}

db_port=${mysql_wyc_port}

db_user=${mysql_wyc_user}

db_passwd=${mysql_wyc_password}

db_database=ics_sale_order

while [ ${begin_date} -lt ${end_date} ]

do

month_len=${#v_dt_month}

if [ ${month_len} -lt 2 ]

then

v_dt_month="0"${v_dt_month}

else

v_dt_month=${v_dt_month}

fi

echo $v_dt_year,$v_dt_month,$v_dt_date

begin_date_s=$v_dt_date

mid_date=`date -d "-1 day ago $begin_date_s" +%Y%m%d`

sqoop_sql="SELECT o.order_no,

o.centre_order_no,

o.user_no,

o.channel_no,

o.product_no,

o.contact_phone,

o.contact_name,

o.state,

o.amount,

o.centre_amount,

o.dis_amount,

o.real_amount,

o.pay_amount,

o.depart_date,

o.depart_time,

o.pay_order_no,

o.pay_platform_no,

o.pay_trade_type,

o.pay_succeed_time,

o.refund_amount,

o.refund_dis_amount,

o.refund_fee,

o.centre_refund_amount,

o.centre_refund_fee,

o.biz_no,

o.expire_time,

o.place_time,

o.make_time,

o.complete_time,

o.revoke_time,

o.revoke_reason,

o.confirm_time,

o.refund_channel_no,

o.refund_admin_no,

o.refund_time,

o.refund_reason,

o.deleted,

o.coupon_id,

o.coupon_dis_amount,

o.real_refund_amount,

o.invoice_fee,

o.insuranceNum,

o.provider_no,

o.supplier_no,

o.remark,

o.create_time,

o.insuranceAmount,

o.insuranced,

o.update_time

FROM ics_sale_order.orders o INNER

JOIN (SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.make_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.make_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.pay_succeed_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.pay_succeed_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.create_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.create_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.update_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.update_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.confirm_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.confirm_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.revoke_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.revoke_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.place_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.place_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

UNION

SELECT o.order_no

FROM ics_sale_order.orders o

WHERE o.refund_time >= STR_TO_DATE('${begin_date}', '%Y%m%d')

AND o.refund_time < STR_TO_DATE('${mid_date}', '%Y%m%d')

) tmp

ON o.order_no = tmp.order_no

where \$CONDITIONS

"

hadoop fs -rm -r ${tmp_hdfs_file_path}

sqoop import --connect jdbc:mysql://${db_ip}:${db_port}/${db_database}?serverTimezone=Asia/Shanghai --username ${db_user} --password ${db_passwd} \

--query "${sqoop_sql}" \

--target-dir "${tmp_hdfs_file_path}/dt_year=${v_dt_year}/dt_month=${v_dt_month}/dt_date=${v_dt_date}" \

--fields-terminated-by "," \

--delete-target-dir --fetch-size 5000 \

--num-mappers 1 --compress --compression-codec ${zip_code}

hql_str="select order_no,centre_order_no,user_no,channel_no,product_no,contact_phone,contact_name,state,amount,centre_amount,dis_amount,real_amount,pay_amount,depart_date,depart_time,pay_order_no,pay_platform_no,pay_trade_type,pay_succeed_time,refund_amount,refund_dis_amount,refund_fee,centre_refund_amount,centre_refund_fee,biz_no,expire_time,place_time,make_time,complete_time,revoke_time,revoke_reason,confirm_time,refund_channel_no,refund_admin_no,refund_time,refund_reason,deleted,coupon_id,coupon_dis_amount,real_refund_amount,invoice_fee,insurancenum,provider_no,supplier_no,remark,create_time,insuranceamount,insuranced,update_time,current_timestamp() as etl_date from ${tmp_target_tab} where dt_date='${v_dt_date}'"

hive_e_str="

use dw_dy_wyc;

alter table ${tmp_target_tab} drop partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');

alter table ${hive_target_tab} drop partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');

alter table ${tmp_target_tab} add partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');

alter table ${hive_target_tab} add partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');

insert overwrite table ${hive_target_tab} partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}') ${hql_str};

alter table ${tmp_target_tab} drop partition(dt_year='${v_dt_year}',dt_month='${v_dt_month}',dt_date='${v_dt_date}');

"

echo "Doing HQL!!!!"

hive -e "$hive_e_str"

echo $hive_e_str

begin_date=${mid_date}

v_dt_year=${begin_date:0:4}

v_dt_month=${begin_date:4:2}

v_dt_date=${begin_date}

echo "Done HQL OVER !!! IS OK !!!! "

done

echo "ALL ALL ALL ${hive_target_tab} !!! IS OK !!!! "

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值