安装
tar -xvf sqoop-1.99.7-bin-hadoop200.tar.gz
sqoop架构
profile配置
vi ~/.bash_profile
export SQOOP_HOME=/home/zkpk/sqoop-1.99.7-bin-hadoop200
export CATALINA_BASE=$SQOOP_HOME/server
export LOGDIR=$SQOOP_HOME/logs/
export PATH=$PATH:$SQOOP_HOME/bin
source ~/.bash_profile
$SQOOP_HOME/conf配置
org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/home/bigdata/hadoop-2.7.3/etc/hadoop
$HADOOP_HOME/etc/hadoop/core-site.xml的代理配置
<property>
<name>hadoop.proxyuser.bigdata.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.bigdata.groups</name>
<value>*</value>
</property>
$HADOOP_HOME/etc/hadoop/yarn-site.xml的代理配置
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
$HADOOP_HOME/etc/hadoop/mapred-site.xml的代理配置
<property>
<name>mapreduce.jobhistory.address</name>
<!-- 配置实际的主机名和端口-->
<value>master:10020</value>
</property>
mysql的jdbc connector下载并移动
sudo yum install mysql-connector-java #下载
cd /usr/share/java
cp mysql-connector-java-5.x.xx.jar $SQOOP_HOME/server/lib #jar拷贝
启动hdfs,yarn,historyserver
start-dfs.sh ;start-yarn.sh ;mr-jobhistory-daemon.sh historyserver;
6021 Jps
4647 NameNode
4952 SecondaryNameNode
5979 JobHistoryServer
5213 NodeManager
4751 DataNode
[bigdata@bigdata ~]$ hdfs dfsadmin -safemode leave
Safe mode is OFF
启动Sqoop服务
sqoop.sh server start;jps
启动Sqoop客户端
sqoop2-shell
Available commands
:exit (:x ) Exit the shell
:history (:H ) Display, manage and recall edit-line history
help (\h ) Display this help message
set (\st ) Configure various client options and settings
show (\sh ) Display various objects and configuration options
create (\cr ) Create new object in Sqoop repository
delete (\d ) Delete existing object in Sqoop repository
update (\up ) Update objects in Sqoop repository
clone (\cl ) Create new object based on existing one
start (\sta) Start job
stop (\stp) Stop job
status (\stu) Display status of a job
enable (\en ) Enable object in Sqoop repository
disable (\di ) Disable object in Sqoop repository
grant (\g ) Grant access to roles and assign privileges
revoke (\r ) Revoke access from roles and remove privileges
设置option
打印详细错误信息。sqoop:000>set option --name verbose --value true
设置连接的服务器。sqoop:000>set server --host master
验证是否已经连上。sqoop:000> show version --all
创建jdbc连接器
create link -connector generic-jdbc-connector
要设置字段请参照“更新jdbc连接器”
更新jdbc连接器
sqoop:000> update link -name mysql-link
Updating link with name mysql-link
Please update link:
Name: mysql-link #输入连接器名字
Database connection
Driver class: com.mysql.jdbc.Driver #mysql的驱动,跟用java访问mysql的驱动一样
Connection String: jdbc:mysql://localhost:3306/bigdb
#这个地方的协议,一定要小心。笔者曾忘记写mysql查了好久才解决这个问题。
Username: root
Password: ****
Fetch Size:
Connection Properties:
There are currently 0 values in the map:
entry#
SQL Dialect
Identifier enclose: #这个地方要输入空格,是SQL中标识符的定界符
link was successfully updated with status WARNING
创建hdfs连接器
create link -connector hdfs-connector
要设置字段请参照“更新hdfs连接器”
更新hdfs连接器
sqoop:000> update link -name hdfs-link
Updating link with name hdfs-link
Please update link:
Name: hdfs-link
HDFS cluster
URI: hdfs://bigdata:9000/
Conf directory: /home/bigdata/hadoop-2.7.3/etc/hadoop
Additional configs::
There are currently 0 values in the map:
entry#
link was successfully updated with status OK
创建job
create job -f 'mysql-link' -t 'hdfs-link'
要设置字段请参照“更新job”
更新job
sqoop:000> update job -name mysql2hdfs
Updating job with name mysql2hdfs
Please update job:
Name: mysql2hdfs
Database source
Schema name: bigdb
Table name: brand
SQL statement:
Column names:
There are currently 0 values in the list:
element#
Partition column: bid
Partition column nullable:
Boundary query:
Incremental read
Check column:
Last value:
Target configuration
Override null value:
Null value:
File format:
0 : TEXT_FILE
1 : SEQUENCE_FILE
2 : PARQUET_FILE
Choose: 0
Compression codec:
0 : NONE
1 : DEFAULT
2 : DEFLATE
3 : GZIP
4 : BZIP2
5 : LZO
6 : LZ4
7 : SNAPPY
8 : CUSTOM
Choose: 0
Custom codec:
Output directory: hdfs://bigdata:9000/sqoop/brand
Append mode:
Throttling resources
Extractors: 1
Loaders: 0
Classpath configuration
Extra mapper jars:
There are currently 0 values in the list:
element#
Job was successfully updated with status OK
sqoop:000>
提交job
start job -name mysql2hdfs