sqoop导入和导出数据脚本

sqoop导入和导出数据脚本

一、将mysql数据导入到Hive

sqoop import \
--connect jdbc:mysql://:3306/bd?characterEncoding=utf-8 \
--username root \
--password '' \
--table tongji_1 \
--delete-target-dir  \
--hive-import \
--hive-overwrite \
--null-string '\\N' \
--null-non-string '\\N' \
--hive-table tongji \
--hive-drop-import-delims \
--fields-terminated-by '|' \
-m 1

二、Hive导出数据到mysql

sqoop export \
--connect jdbc:mysql://:3306/bd \
--username root \
--password ''\
--table tongji_1 \
--export-dir hdfs://:8020/user/hive/warehouse/tongji/000000_0 \
--input-fields-terminated-by '|' \
-m 1 \

三、hdfs数据加入到Hive中

#!/bin/bash
datenow=`date --date='0 days ago' "+%Y-%m-%d"`
echo ${datenow}
#将数据从hdfs导入到hive
hive -e "ALTER TABLE flux ADD  PARTITION(reportTime='${datenow}')location '/flow/reportTime=${datenow}';"
#将数据导入到数据清洗表
hive -e "insert into table dataclear
        select reportTime,url,urlname,stat_uv,split(stat_ss,'_')[0],split(stat_ss,'_')[1],split(stat_ss,'_')[2],cip 
from flux 
        WHERE reportTime='${datenow}';"
#对数据做统计
hive -e "insert into table tongji  
	select '${datenow}',tab1.pv,tab2.uv,tab3.vv,tab4.br,tab5.newip,tab6.newcust,tab7.avgtime,tab8.avgdeep 
from 
	(select count(*) as pv from dataclear where reportTime = '${datenow}') as tab1,
	(select count(distinct uvid) as uv from dataclear where reportTime='${datenow}') as tab2,
	(select count(distinct ssid) as vv from dataclear where reportTime='${datenow}') as tab3,
	(select round(br_a_tab.br_count/br_b_tab.vv_count,4) as br from (select count(*) as br_count from (select ssid from dataclear where reportTime = '${datenow}' group by ssid having count(*) = 1) as br_tab) as br_a_tab,(select count(distinct ssid) as vv_count from dataclear where reportTime='${datenow}') as br_b_tab) as tab4,
	(select count(distinct dataclear.cip) as newip from dataclear where reportTime='${datenow}' and dataclear.cip not in (select inner_dataclear.cip from dataclear as inner_dataclear where datediff(inner_dataclear.reportTime,'${datenow}')<0)) as tab5,
	(select count(distinct dataclear.uvid) as newcust from dataclear where reportTime='${datenow}' and dataclear.uvid not in (select inner_dataclear.uvid from dataclear as inner_dataclear where datediff(inner_dataclear.reportTime,'${datenow}')<0)) as tab6,
	(select round(avg(usetime),4) as avgtime from (select max(sstime) - min(sstime) as usetime from dataclear where reportTime='${datenow}' group by ssid) as avgtime_tab) as tab7,
	(select round(avg(deep),2) as avgdeep from (select count(distinct urlname) as deep from dataclear where reportTime='${datenow}' group by ssid) as avgdeep_tab) as tab8;"

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值