【基于Sqoop对MySQL和HDFS数据进行同步】

基于Sqoop对MySQL和HDFS数据进行同步

sqoop的部署

tar -zxvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz -C /opt/soft_installed


# 配置mysql驱动到sqoop的lib文件夹中
cp mysql-connector-java-8.0.30.jar /opt/soft_installed/sqoop-1.4.7.bin__hadoop-2.6.0/lib

# 配置sqoop-evn.sh
[root@yarnserver conf]# cat sqoop-env.sh

export HADOOP_COMMON_HOME=/opt/soft_installed/hadoop-2.7.3
export HADOOP_MAPRED_HOME=/opt/soft_installed/hadoop-2.7.3
export HIVE_HOME=/opt/soft_installed/apache-hive-2.3.9-bin
export ZOOCFGDIR=/opt/soft_installed/zookeeper-3.4.5


sqoop从mysql同步数据到hdfs

sqoop测试及数据导入

# 测试连接
sqoop list-databases --connect jdbc:mysql://node1:3306/ --username lh --password Lh123456!

# 从MySQL数据导入HDFS
# 表student导入hdfs
sqoop import \
--connect jdbc:mysql://node1:3306/bigdata19?serverTimezone=UTC \
--driver com.mysql.cj.jdbc.Driver \
--username lh \
--password Ypassowrd \
--table student \
--target-dir /db/mysql/student \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by '\t';

# t_bank表数据导入hdfs
# select * from t_bank where age=0;
sqoop import \
--connect jdbc:mysql://node1:3306/bigdata19?serverTimezone=UTC \
--username lh \
--password Ypassowrd  \
--target-dir /db/mysql/t_bank1 \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by '\t' \
--query 'select * from t_bank where age=0 and $CONDITIONS';

# 导入数据时,对null的处理,空值替换成"\N"
sqoop import \
--connect jdbc:mysql://node1:3306/bigdata19?serverTimezone=UTC \
--username lh \
--password Ypassowrd  \
--target-dir /db/mysql/t_bank1 \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by '\t' \
--query 'select * from t_bank where age=0 and $CONDITIONS'
--null-string '\\N'
--null-non-string '\\N';


在这里插入图片描述

sqoop导出hdfs数据到mysql

在mysql中提前创建好表

drop table if exists sc;
create table sc as select * from t_bank;
truncate table sc;
select * from sc limit 5;

sqoop导出数据

[root@master lh]# hdfs dfs -du -h /user/hive/warehouse/hdfs_bigdata_19.db
228      /user/hive/warehouse/hdfs_bigdata_19.db/c2rtest
0        /user/hive/warehouse/hdfs_bigdata_19.db/cl
352      /user/hive/warehouse/hdfs_bigdata_19.db/gunbigdata19_window_1
352      /user/hive/warehouse/hdfs_bigdata_19.db/gunbigdata19_window_2
630.1 K  /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank
10       /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank__63710dac_3886_4b1a_bfe4_682b268ecfdb
486.2 K  /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank_orc
729.3 K  /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank_orc_snappy
585.4 K  /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank_parquet
585.4 K  /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank_parquet_snappy
4.9 M    /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank_test
279      /user/hive/warehouse/hdfs_bigdata_19.db/r2cc2rtest
4.9 M    /user/hive/warehouse/hdfs_bigdata_19.db/temp_19_t_bank
4.9 M    /user/hive/warehouse/hdfs_bigdata_19.db/temp_age_t_bank


# sqoop导出的hive表需要是textfile格式的
# 需要提前在mysql中创建于hive中待导出表相同结构的表
# HDFS数据导出导MySQL
/opt/soft_installed/sqoop-1.4.7.bin__hadoop-2.6.0/bin/sqoop export \
--connect "jdbc:mysql://node1:3306/bigdata19?useUnicode=true&characterEncoding=utf-8" \
--username lh \
--password Ypassowrd \
--driver com.mysql.cj.jdbc.Driver \
--table sc \
--export-dir /user/hive/warehouse/hdfs_bigdata_19.db/ods_t_bank_test \
--m 1 --update-mode allowinsert \
--input-null-non-string '\\N' \
--input-null-string '\\N' \
--input-fields-terminated-by '\001' --lines-terminated-by '\n'

在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值