sqoop安装 sqoop命令 实现mysql hbase hive之间的数据转换 sqoop job命令

#o、角解压、重命名、配置环境变量并激活
exort LOGD工R=$sQOOP_ HOME / logs
#1、资源拷贝

cd / opt/ software/ sqoop146/ lib/
find /opt/software/hive312/ -name 'mysql大.jar'
cp /opt/software/hive312/lib/mysql-connector-java-5.1.48.jar ./
cp /opt/software/hadoop313/share/hadoop/common/hadoop-common-3.1.3.jar ./
cp /opt/software/hadoop313/share/hadoop/hdfs/hadoop-hdfs-3.1.3.jar ./
cp /opt/software/hadoop313/share/hadoop/mapreduce/hadoop-mapreduce-client-core-3.1.3.jar ./
#2、配置
export HADOOP_COMMON_HOME=/opt /software/hadoop313
export HADOOP_MAPRED_HOME=/opt/software/hadoop313
export HBASE_HOME=/ opt/ software/ hbase235
export H工VE_HOME=/ opt/ software/ hive312
export ZOOCFGDIR=/ opt/software/ zookeeper357

vim /opt/software/sqoop146/conf/sqoop-env.sh
#should not be executable directly
# also should not be passed any arguments,since we need original $*
# Set Hadoop-specific environment variables here.
#Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=/opt/software/hadoop313
#Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=/opt/software/hadoop313
#set the path to where bin/hbase is available
export HBASE_HOME=/opt/software/hbase235
#Set the path to where bin/hive is available
export HIVE_HOME=/opt/software/hive312
#Set the path for where zookeper config dir is
export z00CFGDIR=/opt/software/zookeeper357

# 3、测试
grant all on *.* to root@ '%' identified by '#kb12@kbc.ABC ';
sqoop list-databases --connect jdbc:mysql://singlehenry:3306 --username root --password \#kb12.kbc .ABC


#sqoop
#1、版本支撑
1.4.6|7 => hadoop 2.x + hive 1.x + hbase 1.x
		=> hadoop 3.1.3 + hive 3.1.2 + hbase 2.3.5(不支持)
#2、操作类型:参考方向为
	RDBMS --(import)--> hadoop(hdfs|hive|hbase) --(export)--> RDBMS(mysql|oracle|...)

---- hdfs -> mysql ----

	2021-06-15	henry	528.20

	create table sqp_order(
	create_data date,
	user_name varchar(20),
	total_volume decimal(10,2)
	);

	sqoop export \
	--connect jdbc:mysql://singlerenfeifan:3306/test \
	--username root \
	--password RENZHIJIE@1997913 \
	--table sqp_order \
	-m 1 \
	--export-dir /kb12/hive/orderinfo \
	--fields-terminated-by '\t'



---- mysql -> hdfs ----
#全量导入
sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table sqp_order \
-m 1 \
--delete-target-dir \
--target-dir /kb12/sqoop/m2h_all \
--fields-terminated-by '\t' \
--lines-terminated-by '\n'

#列裁剪
sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table sqp_order \
--columns user_name,total_volume \
-m 1 \
--delete-target-dir \
--target-dir /kb12/sqoop/m2h_colcut \
--fields-terminated-by '\t' \
--lines-terminated-by '\n'

#行裁剪+多个reducer
sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table sqp_order \
--columns user_name,total_volume \
--where "total_volume>=200" \
-m 2 \
--split-by user_name \
--delete-target-dir \
--target-dir /kb12/sqoop/m2h_rowcut \
--fields-terminated-by ',' \
--lines-terminated-by '\n'


sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--query "select user_name,total_volume from sqp_order where total_volume>=300 and \$CONDITIONS" \
-m 2 \
--split-by user_name \
--delete-target-dir \
--target-dir /kb12/sqoop/m2h_mgt2 \
--fields-terminated-by ',' \
--lines-terminated-by '\n'

#增量导入append|merge
sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--query "select * from studentinfo where \$CONDITIONS" \
-m 1 \
--target-dir /kb12/sqoop/m2h_incr_append \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--check-column stuId \
--incremental append \
--last-value 0

insert into studentinfo values
(49,'蔡星威1',32,'男','14568758132',25201,6),
(50,'焦强1',28,'男','15314381033',23489,7),
(51	,'庞泽1',23,'男','13892411574',25578,2),
(52,'吴嘉伟1',27,'男','13063638045',22617,4),
(53,'孟浩东1',32,'男','13483741056',26284,2);

sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--query "select * from studentinfo where \$CONDITIONS" \
-m 1 \
--target-dir /kb12/sqoop/m2h_incr_append \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--check-column stuId \
--incremental append \
--last-value 48

#增量导入lastmodified
create table sqp_incr_time(
incrName varchar(20),
incrTime timestamp
);

insert into sqp_incr_time(incrName) values
('henry'),('pola'),('miki'),('jack'),('rose'),('jason'),('mike'),('anmi');

insert into sqp_incr_time(incrName,incrTime) values
('andi1','2021-06-29 13:12:13.0'),
('angela2','2021-06-29 13:14:13.0'),
('wangcai3','2021-06-29 13:15:13.0');

sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--query "select * from sqp_incr_time where \$CONDITIONS" \
-m 1 \
--target-dir /kb12/sqoop/m2h_incr_lastmodified \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--check-column incrTime \
--incremental lastmodified \
--last-value '0000-00-00 00:00:00'

#从上一次的当前时间到现在的当前时间

sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--query "select * from sqp_incr_time where \$CONDITIONS" \
-m 1 \
--target-dir /kb12/sqoop/m2h_incr_lastmodified \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--check-column incrTime \
--incremental lastmodified \
--append \
--last-value '2021-06-29 12:16:27'

#分区表单分区导入
#开启动态分区:一次性将某表中的数据写入另一张分区表的多个分区中

#mysql中建表和数据
create table sqp_partition(
id int,
name varchar(20),
dotime datetime
);

insert into sqp_partition(id,name,dotime) values
(1,'henry','2021-06-01 12:12:14'),
(2,'pola','2021-06-01 12:24:14'),
(3,'ariel','2021-06-01 12:45:14'),
(4,'rose','2021-06-01 13:15:14'),
(5,'jack','2021-06-01 14:12:34');

insert into sqp_partition(id,name,dotime) values
(6,'henry','2021-06-29 12:12:14'),
(7,'pola','2021-06-29 12:24:14'),
(8,'ariel','2021-06-29 12:45:14'),
(9,'rose','2021-06-29 13:15:14'),
(10,'jack','2021-06-29 14:12:34');


#hive中建表
create table sqp_partition(
id int,
name string,
dotime timestamp
)
partitioned by (dodate date)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

#sqoop将mysql表中数据导入hive
sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table sqp_partition \
--where "cast(dotime as date)='2021-06-01'" \
-m 1 \
--delete-target-dir \
--target-dir /user/hive/warehouse/test.db/sqp_partition/dodate=2021-06-01 \
--fields-terminated-by ',' \
--lines-terminated-by '\n'

#hive中追加命令
alter table sqp_partition add partition(dodate='2021-06-01');

#hive中查看分区
show partitions sqp_partition;


#shell脚本

#!/bin/bash

source /etc/profile
DATE=`date -d '-1 day' +%F`

sqoop import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table sqp_partition \
--where "cast(dotime as date)='$DATE'" \
-m 1 \
--delete-target-dir \
--target-dir /user/hive/warehouse/test.db/sqp_partition/dodate=$DATE \
--fields-terminated-by ',' \
--lines-terminated-by '\n'

hive -e "alter table test.sqp_partition add partition(dodate='$DATE')"


#sqoop job
#job中密码问题需要在sqoop-site.xml中将以下注释打开
<property>
	<name>sqoop.metastore.client.record.password</name>
	<value>true</value>
	<description>If true, allow saved passwords in the metastore.
	</description>
</property>
#查看job
sqoop job --list
#删除job
sqoop job --delete job_m2hv_par
#查看job定义
sqoop job --show job_m2hv_par
#创建job
sqoop job \
--create job_m2hv_par \
-- import \
--connect jdbc:mysql://singlerenfeifan:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--query "select * from sqp_incr_time where \$CONDITIONS" \
-m 1 \
--target-dir /kb12/sqoop/m2h_incr_lastmodified \
--fields-terminated-by ',' \
--lines-terminated-by '\n' \
--check-column incrTime \
--incremental lastmodified \
--append \
--last-value '2021-06-29 12:16:27'
#执行job
sqoop job --exec job_m2hv_par


#-------------------mysql -> hive---------------------

sqoop import \
--connect jdbc:mysql://192.168.171.200:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table studentinfo \
-m 1 \
--hive-import \
--hive-table test.studentinfo \
--create-hive-table

insert into studentinfo values
(54,'蔡星威2',32,'男','14568758167',25201,6),
(55,'焦强2',28,'男','15314381045',23489,7),
(56	,'庞泽2',23,'男','13892411534',25578,2),
(57,'吴嘉伟2',27,'男','13063638023',22617,4),
(58,'孟浩东2',32,'男','134837410456',26284,2);

sqoop import \
--connect jdbc:mysql://192.168.171.200:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table studentinfo \
--where "stuId>=50" \
-m 1 \
--hive-import \
--hive-table test.studentinfo

#尝试分区导入
create table sqp_user_par(
stuId int,
stuName string,
stuAge int,
stuGender string,
mobile string,
tuition decimal(7,2),
fkClassId int
)
partitioned by (classId_range string)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as textfile;

#将classId为1-4的作为一个分区
sqoop import \
--connect jdbc:mysql://192.168.171.200:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table studentinfo \
--where "fkClassId between 1 and 4" \
-m 1 \
--hive-import \
--hive-table test.sqp_user_par \
--hive-partition-key classId_range \
--hive-partition-value '1-4' \
--fields-terminated-by ',' \
--lines-terminated-by '\n'

#将classId为5-7的作为一个分区
sqoop import \
--connect jdbc:mysql://192.168.171.200:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table studentinfo \
--where "fkClassId between 5 and 7" \
-m 1 \
--hive-import \
--hive-table test.sqp_user_par \
--hive-partition-key classId_range \
--hive-partition-value '5-7' \
--fields-terminated-by ',' \
--lines-terminated-by '\n'

#创建分区的脚本

#!/bin/bash

B=$1
E=$2

sqoop import \
--connect jdbc:mysql://192.168.171.200:3306/test \
--username root \
--password RENZHIJIE@1997913 \
--table studentinfo \
--where "stuId between $B and $E" \
-m 1 \
--hive-import \
--hive-table test.sqp_user_par \
--hive-partition-key classId_range \
--hive-partition-value "$B-$E" \
--fields-terminated-by ',' \
--lines-terminated-by '\n'

#---------------------------- mysql -> hbase ---------------------------
mysql + jdbc = 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值