sqoop导入hdfs
官方文档
http://sqoop.apache.org/docs/1.4.6/SqoopUserGuide.html
修改 sqoop-env.sh
#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/root/app/hadoop
#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/root/app/hadoop
#set the path to where bin/hbase is available
export HBASE_HOME=/root/app/hbase
#Set the path to where bin/hive is available
export HIVE_HOME=/root/app/hive
#Set the path for where zookeper config dir is
export ZOOCFGDIR=/root/app/zookeeper/conf
mysql-connector-java-5.1.38.jar添加jar包
java-json.jar
测试运行
bin/sqoop list-databases \
--connect 'jdbc:mysql://192.168.230.1:3306/sqoop?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123
将MySQL数据导入HDFS
bin/sqoop import \
--connect 'jdbc:mysql://192.168.230.1:3306/sqoop?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table stu2 \
--fields-terminated-by '\t' \
-m 1
bin/hdfs dfs -cat /user/root/stu2/*
1 张三 男 30
2 赵四 女 28
Hdfs删除目录
bin/hdfs dfs -rm -r /user/root/stu
将HDFS数据导入MySQL
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1:3306/sqoop?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table stu3 \
--columns uid,name,sex,age \
--export-dir '/user/root/stu2' \
--input-fields-terminated-by "\t" \
-m 1 \
--update-key uid \
--update-mode updateonly
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1:3306/sqoop?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table stu3 \
--columns uid,name,sex,age \
--export-dir '/user/root/stu2' \
--input-fields-terminated-by "\t" \
-m 1 \
--update-key uid \
--update-mode allowinsert
sqoop导入hive
所需jar包
cp /root/app/hive/lib/hive-common-1.1.0-cdh5.10.0.jar /root/app/sqoop/lib/
cp /root/app/hive/lib/hive-shims-* /root/app/sqoop/lib/
将mysql导入hive
bin/sqoop import \
--connect 'jdbc:mysql://192.168.230.1:3306/sqoop?useUnicode=true&characterEncoding=utf-8'
--username root \
--password 123 \
--table stu3 \
--target-dir "/sqoop/job" \
--fields-terminated-by '\t' \
-m 1 \
--hive-import
hive> select * from stu3 ;
OK
1 张三 男 30
2 赵四 女 28
Shell
创建shell目录
[root@cdh1 ~]# mkdir shell
[root@cdh1 ~]# cd shell/
[root@cdh1 shell]# mkdir bin
[root@cdh1 shell]# mkdir conf
[root@cdh1 shell]# mkdir logs
[root@cdh1 shell]# mkdir data
[root@cdh1 shell]# mkdir lib
conf
vi sq_db.config
CONNECTURL='jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8'
USERNAME=root
PASSWORD=123
MAPNUM=3
vi common.sh
#!/bin/sh
#切换到当前目录的父目录
home=$(cd `dirname $0`; cd ..; pwd)
bin_home=$home/bin
conf_home=$home/conf
logs_home=$home/logs
data_home=$home/data
lib_home=$home/lib
#sqoop 的根目录
sqoop_home=/root/app/sqoop
bin sqoop_import
vi sq_db.config
CONNECTURL='jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8'
USERNAME=root
PASSWORD=123
MAPNUM=3
vi common.sh
#!/bin/sh
#切换到当前目录的父目录
home=$(cd `dirname $0`; cd ..; pwd)
bin_home=$home/bin
conf_home=$home/conf
logs_home=$home/logs
data_home=$home/data
lib_home=$home/lib
#sqoop 的根目录
sqoop_home=/root/app/sqoop
sqoop执行及验证
#mysql导入hive
chmod 777 sqoop_import
./sqoop_import user_info
./sqoop_import bank_detail
./sqoop_import browse_history
./sqoop_import bill_detail
./sqoop_import loan_time
./sqoop_import overdue
bin/hive -e "select * from user_info limit 10;"
bin hive_create
#! /bin/sh
source /etc/profile
source ../conf/common.sh
echo "hive数据处理开始......"
#表名称
$hive_home/bin/hive -f $data_home/$1
hive sql
create table result1
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select mbu.period,count(mbu.uid) as num from (select u.uid,case when
u.birthday>='1990' and u.birthday<='1999' then '90 后' when u.birthday>='1980' and
u.birthday<='1989' then '80 后' when u.birthday>='1970' and u.birthday<='1979'
then '70 后' when u.birthday>='1960' and u.birthday<='1969' then '60 后' when
u.birthday>='1950' and u.birthday<='1959' then '50 后' else '其他' end as period
from middle_bill_user u) mbu group by mbu.period;
create table result2
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select sex,count(uid) as num from middle_bill_user group by sex;
create table result3
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select province,count(uid) as num from middle_bill_user group by province;
create table result4
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select bds.salarylevel,count(bds.uid) as num from
(select bd.uid,
case when bd.amount>=30000 then '30000 以上'
when bd.amount>=20000 and bd.amount<30000 then '20000-30000'
when bd.amount>=10000 and bd.amount<20000 then '10000-20000'
when bd.amount>=5000 and bd.amount<10000 then '5000-10000'
when bd.amount>=1000 and bd.amount<5000 then '1000-5000'
else '1000 以下' end as salarylevel
from
(select b.uid,sum(b.tradeacount) as amount from bank_detail b LEFT JOIN
middle_bill_user u on u.uid=b.uid and b.tradetype=0 group by b.uid) bd) bds group
by bds.salarylevel;
create table result5
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select bdm.banknum,count(bdm.uid) as cardnum from
(select bd.uid,
case when bd.num>=5 then '持卡 5 张及以上'
when bd.num=4 then '持卡 4 张'
when bd.num=3 then '持卡 3 张'
when bd.num=2 then '持卡 2 张'
else '持卡 1 张' end as banknum
from (select uid,count(distinct bankid) as num from bill_detail group by uid) bd) bdm
group by bdm.banknum;
create table result6
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select bd.bankid,count(bd.uid) as usernum from
(select distinct uid, bankid from bill_detail ) bd group by bd.bankid;
create table result7
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select bdtt.consumeproportion,count(bdtt.uid) as num from
(select bdt.uid,
case when bdt.proportion>=3.0 then '300%以上'
when bdt.proportion>=2.0 and bdt.proportion<3.0 then '200%-300%'
when bdt.proportion>=1.0 and bdt.proportion<2.0 then '100%-200%'
when bdt.proportion>=0.5 and bdt.proportion<1.0 then '50%-100%'
else '50%以下' end as consumeproportion from
(select bd.uid,max(case when bd.tradetype=1 then bd.amount else 0 end)/max(case
when bd.tradetype=0 then bd.amount else 0 end) as proportion from (select
uid,tradetype,sum(tradeacount) as amount from bank_detail group by uid,tradetype)
bd group by bd.uid) bdt) bdtt group by bdtt.consumeproportion;
create table result9
row format delimited
fields terminated by ','
STORED AS TEXTFile
as select bdh.amountlevel,count(bdh.uid) as num
from
(select bdt.uid,
case when bdt.amount>=10000 then '10000 以上'
when bdt.amount>=5000 and bdt.amount<10000 then '5000-10000'
when bdt.amount>=1000 and bdt.amount<5000 then '1000-5000'
else '1000 以下' end as amountlevel
from
(select bd.uid,sum(bd.creditlimit) as amount from bill_detail bd group by uid) bdt)
bdh group by bdh.amountlevel;
hive shell执行及验证
./hive_create middle_bill_user
./hive_create result1
...
./hive_create result10
#如果执行中间遇到错误,比如result1,可以
bin/hive -e "drop table result1;"
然后重新执行
#验证
[root@cdh1 hadoop]# bin/hdfs dfs -cat /user/hive/warehouse/result1/*
50 后,11032
60 后,10926
70 后,10588
80 后,10937
90 后,9691
mysql 脚本
CREATE TABLE `result1` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`period` varchar(10) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result2` (
`rid` int(10) NOT NULL AUTO_INCREMENT,
`sex` varchar(3) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result3` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`province` varchar(10) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result4` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`salarylevel` varchar(10) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result5` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`banknum` varchar(20) CHARACTER SET utf8 NOT NULL,
`cardnum` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result6` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`bankid` varchar(10) CHARACTER SET utf8 NOT NULL,
`usernum` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result7` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`consumeproportion` varchar(10) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result8` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`amountlevel` varchar(20) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result9` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`amountlevel` varchar(20) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `result10` (
`rid` int(11) NOT NULL AUTO_INCREMENT,
`sampleLabel` varchar(3) CHARACTER SET utf8 NOT NULL,
`num` double DEFAULT NULL,
PRIMARY KEY (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
hive导入mysql
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result1 \
--columns period,num \
--export-dir '/user/hive/warehouse/result1' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result2 \
--columns sex,num \
--export-dir '/user/hive/warehouse/result2' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result3 \
--columns province,num \
--export-dir '/user/hive/warehouse/result3' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result4 \
--columns salarylevel,num \
--export-dir '/user/hive/warehouse/result4' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result5 \
--columns banknum,cardnum \
--export-dir '/user/hive/warehouse/result5' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result6 \
--columns bankid,usernum \
--export-dir '/user/hive/warehouse/result6' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result7 \
--columns consumeproportion,num \
--export-dir '/user/hive/warehouse/result7' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result8 \
--columns amountlevel,num \
--export-dir '/user/hive/warehouse/result8' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result9 \
--columns amountlevel,num \
--export-dir '/user/hive/warehouse/result9' \
--fields-terminated-by ',' \
-m 1;
bin/sqoop export \
--connect 'jdbc:mysql://192.168.230.1/test?useUnicode=true&characterEncoding=utf-8' \
--username root \
--password 123 \
--table result10 \
--columns sampleLabel,num \
--export-dir '/user/hive/warehouse/result10' \
--fields-terminated-by ',' \
-m 1;
执行过程过程中间报错
发现错误日志在cdh2
在cdh2中/root/app/hadoop/logs/userlogs
cd container_1584960241688_0030_01_000001
cat syslog