#博学谷IT学习技术支持#
1.从mysql导入到hive的ods层的shell脚本
传参表示要导入的数据分期日期,sqoop导入,实例:
#!/bin/bash
export SQOOP_HOME=/usr/bin/sqoop
if [ $# == 1 ]
then
dateStr=$1
else
dateStr=`date -d '-1 day' +'%Y-%m-%d'`
fi
dateNowStr=`date +'%Y-%m-%d'`
yearStr=`date -d ${dateStr} +'%Y'`
monthStr=`date -d ${dateStr} +'%m'`
jdbcUrl='jdbc:mysql://192.168.88.80:3306/nev'
username='root'
password='123456'
m='1'
${SQOOP_HOME} import \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query "SELECT
id,create_date_time,session_id,sid,create_time,seo_source,seo_keywords,ip,
AREA,country,province,city,origin_channel,USER AS user_match,manual_time,begin_time,end_time,
last_customer_msg_time_stamp,last_agent_msg_time_stamp,reply_msg_count,
msg_count,browser_name,os_info, '${dateNowStr}' AS starts_time
FROM web_chat_ems_${yearStr}_${monthStr} WHERE create_time BETWEEN '${dateStr} 00:00:00' AND '${dateStr}
23:59:59' and \$CONDITIONS" \
--hcatalog-database itcast_ods \
--hcatalog-table web_chat_ems \
-m ${m}
${SQOOP_HOME} import \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query "SELECT
temp2.*, '${dateNowStr}' AS start_time
FROM (SELECT id FROM web_chat_ems_${yearStr}_${monthStr} WHERE create_time BETWEEN '${dateStr} 00:00:00'
AND '${dateStr} 23:59:59') temp1
JOIN web_chat_text_ems_${yearStr}_${monthStr} temp2 ON temp1.id = temp2.id where 1=1 and \$CONDIT
IONS" \
--hcatalog-database itcast_ods \
--hcatalog-table web_chat_text_ems \
-m ${m}
2.执行从ods层到dwd层或者从dwd层到dwb层的shell脚本
语法:./hive -e|-f ‘sql语句|SQL脚本’ -S
例子:
#!/bin/bash
export HIVE_HOME=/usr/bin/hive
if [ $# == 1 ]
then
dateStr=$1
else
dateStr=`date +'%Y-%m-%d'`
fi
sqlStr="
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
set hive.exec.orc.compression.strategy=COMPRESSION;
insert into table itcast_dwd.visit_consult_dwd partition(yearinfo,quarterinfo,monthinfo,dayinfo)
select
wce.session_id,
wce.sid,
unix_timestamp(wce.create_time) as create_time,
wce.seo_source,
wce.ip,
wce.area,
wce.msg_count,
wce.origin_channel,
wcte.referrer,
wcte.from_url,
wcte.landing_page_url,
wcte.url_title,
wcte.platform_description,
wcte.other_params,
wcte.history,
substr(wce.create_time,12,2) as hourinfo,
substr(wce.create_time,1,4) as yearinfo,
quarter(wce.create_time) as quarterinfo,
substr(wce.create_time,6,2) as monthinfo,
substr(wce.create_time,9,2) as dayinfo
from (select * from itcast_ods.web_chat_ems where starts_time='${dateStr}') wce join (select * from itcast_ods.web_chat_text_ems where start_time='${dateStr}') wcte
on wce.id = wcte.id;
"
${HIVE_HOME} -e "${sqlStr}" -S
3.执行从dws层到mysql层的shell脚本
增量数据导出操作说明:
在执行导出的时候, 也需要将mysql中之前的统计的当年当季度和当月的结果数据删除, 然后重新导入操作
此时我们处理的方案, 要进行简化一些, 受影响最大范围当年的数据, 可以直接将当年的统计结果数据全部都删除, 然后重新到DWS层当年的所有数据
#!/bin/bash
export SQOOP_HOME=/usr/bin/sqoop
if [ $# == 1 ]
then
TD_DATE=$1
else
TD_DATE=`date -d '-1 day' +'%Y-%m-%d'`
fi
TD_YEAR=`date -d ${TD_DATE} +%Y`
mysql -uroot -p123456 -h192.168.52.150 -P3306 -e "delete from scrm_bi.visit_dws where yearinfo='$TD_YEAR'; delete from scrm_bi.consult_dws where yearinfo='$TD_YEAR';"
jdbcUrl='jdbc:mysql://192.168.88.80:3306/scrm_bi?useUnicode=true&characterEncoding=utf-8'
username='root'
password='123456'
${SQOOP_HOME} export \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--table visit_dws \
--hcatalog-database itcast_dws \
--hcatalog-table visit_dws \
--hcatalog-partition-keys yearinfo \
--hcatalog-partition-values $TD_YEAR \
-m 1
${SQOOP_HOME} export \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--table consult_dws \
--hcatalog-database itcast_dws \
--hcatalog-table consult_dws \
--hcatalog-partition-keys yearinfo \
--hcatalog-partition-values $TD_YEAR \
-m 1