1) Build and deploy
## download sqoop-1.4.7.tar.gz from http://archive.apache.org/dist/
## unpack to ~/work/sqoop-src-1.4.7
## modify project files
$ cd ~/work/sqoop-src-1.4.7
$ diff -u build.xml.orig build.xml # change to valid repository, omit docs
-- build.xml.orig 2017-12-19 07:00:00.000000000 +0800
+++ build.xml 2022-06-08 17:27:49.000000000 +0800
@@ -171,9 +171,9 @@
<property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
<property name="ivy.jar" location="${lib.dir}/ivy-${ivy.version}.jar"/>
<property name="ivy_repo_url"
- value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
+ value="https://maven.aliyun.com/repository/public/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
<property name="mvn_repo_url"
- value="http://repo2.maven.org/maven2/org/apache/maven/maven-ant-tasks/${mvn.version}/maven-ant-tasks-${mvn.version}.jar"/>
+ value="https://maven.aliyun.com/repository/public/org/apache/maven/maven-ant-tasks/${mvn.version}/maven-ant-tasks-${mvn.version}.jar"/>
<property name="mvn.jar"
location="${build.dir}/maven-ant-tasks-${mvn.version}.jar" />
<property name="build.ivy.dir" location="${build.dir}/ivy" />
@@ -184,7 +184,7 @@
<!--this is the naming policy for artifacts we want pulled down-->
<property name="ivy.artifact.retrieve.pattern"
- value="${name}/[conf]/[artifact]-[revision](-[classifier]).[ext]"/>
+ value="${name}/[conf]/[artifact]-[type]-[revision](-[classifier]).[ext]"/>
<!--test related properties -->
<property name="sqoop.test.oracle.connectstring" value="jdbc:oracle:thin:@//localhost/xe"/>
@@ -491,7 +491,7 @@
</target>
<target name="package"
- depends="jar-all,compile-all,docs,ivy-retrieve-redist,scripts"
+ depends="jar-all,compile-all,ivy-retrieve-redist,scripts"
description="Create a redistributable package">
<mkdir dir="${dist.dir}"/>
@@ -542,7 +542,7 @@
</copy>
<!-- copy in documentation build artifacts -->
- <copy todir="${dist.dir}/docs" includeEmptyDirs="false" flatten="false">
+ <!-- copy todir="${dist.dir}/docs" includeEmptyDirs="false" flatten="false">
<fileset dir="${build.dir}/docs">
<include name="**/*.html" />
<include name="**/*.css" />
@@ -553,7 +553,7 @@
<fileset dir="${build.dir}/docs">
<include name="**/*.gz" />
</fileset>
- </copy>
+ </copy -->
<!-- copy in auto-generated bin scripts -->
<copy todir="${dist.dir}/bin" includeEmptyDirs="false" flatten="true">
$ diff -u ivy/ivysettings.xml.orig ivy/ivysettings.xml # change repository
--- ivy/ivysettings.xml.orig 2017-12-19 07:00:00.000000000 +0800
+++ ivy/ivysettings.xml 2022-06-08 17:29:13.000000000 +0800
@@ -31,7 +31,7 @@
http://ibiblio.lsu.edu/main/pub/packages/maven2
http://www.ibiblio.net/pub/packages/maven2
-->
- <property name="repo.maven.org" value="http://repo1.maven.org/maven2/"
+ <property name="repo.maven.org" value="https://maven.aliyun.com/repository/public/"
override="false"/>
<property name="snapshot.apache.org"
value="https://repository.apache.org/content/repositories/snapshots/"
$ diff ivy/libraries.properties.orig ivy/libraries.properties # change hadoop version
--- ivy/libraries.properties.orig 2017-12-19 07:00:00.000000000 +0800
+++ ivy/libraries.properties 2022-06-08 17:29:00.000000000 +0800
@@ -54,6 +54,6 @@
slf4j.version=1.7.7
-hadoop.version=2.6.0
+hadoop.version=2.8.0
hbase.version=1.2.4
hcatalog.version=1.2.1
## build, create the tar ball if build sucessful
$ ant tar
$ cd ~/work
$ tar xvf sqoop-src-1.4.7/build/sqoop-1.4.7.bin__hadoop-2.8.0.tar.gz
$ mv sqoop-1.4.7.bin__hadoop-2.8.0 sqoop-1.4.7
## copy mysql driver
$ cp -p dwonloads/mysql-connector-java-8.0.28.jar sqoop-1.4.7/lib/
## customize environment
$ cd ~/work/sqoop-1.4.7
$ diff -u conf/sqoop-env-template.sh conf/sqoop-env.sh
--- conf/sqoop-env-template.sh 2022-06-08 17:31:21.000000000 +0800
+++ conf/sqoop-env.sh 2022-06-09 09:11:44.000000000 +0800
@@ -20,16 +20,16 @@
# Set Hadoop-specific environment variables here.
#Set path to where bin/hadoop is available
-#export HADOOP_COMMON_HOME=
+export HADOOP_COMMON_HOME=$HADOOP_HOME
#Set path to where hadoop-*-core.jar is available
-#export HADOOP_MAPRED_HOME=
+export HADOOP_MAPRED_HOME=$HADOOP_HOME
#set the path to where bin/hbase is available
#export HBASE_HOME=
#Set the path to where bin/hive is available
-#export HIVE_HOME=
+export HIVE_HOME=/Users/sun_xo/work/hive-2.3.9
#Set the path for where zookeper config dir is
#export ZOOCFGDIR=
2) Import from MySQL
$ cd ~/work/sqoop-1.4.7
## list tables
$ bin/sqoop list-tables --connect jdbc:mysql://localhost:3306/manga --username manga --password manga
...
export
form
form_detail
fruit
$ bin/sqoop import --connect jdbc:mysql://localhost:3306/manga --username manga --password manga \
--table fruit --target-dir /user/sun_xo/sqoop/fruit \
--num-mappers 1 --delete-target-dir
22/06/09 15:33:13 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7
22/06/09 15:33:13 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
22/06/09 15:33:13 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
22/06/09 15:33:13 INFO tool.CodeGenTool: Beginning code generation
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
22/06/09 15:33:13 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:33:14 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:33:14 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /Users/sun_xo/work/hadoop
Note: /tmp/sqoop-sun_xo/compile/47ca53cd7c2f9abf409e90ef9e7734d3/fruit.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
22/06/09 15:33:15 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-sun_xo/compile/47ca53cd7c2f9abf409e90ef9e7734d3/fruit.jar
22/06/09 15:33:15 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 15:33:16 INFO tool.ImportTool: Destination directory /user/sun_xo/sqoop/fruit deleted.
22/06/09 15:33:16 WARN manager.MySQLManager: It looks like you are importing from mysql.
22/06/09 15:33:16 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
22/06/09 15:33:16 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
22/06/09 15:33:16 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
22/06/09 15:33:16 INFO mapreduce.ImportJobBase: Beginning import of fruit
22/06/09 15:33:16 INFO Configuration.deprecation: mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
22/06/09 15:33:16 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
22/06/09 15:33:16 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
22/06/09 15:33:16 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
22/06/09 15:33:17 INFO db.DBInputFormat: Using read commited transaction isolation
22/06/09 15:33:17 INFO mapreduce.JobSubmitter: number of splits:1
22/06/09 15:33:17 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1654591879954_0009
22/06/09 15:33:17 INFO impl.YarnClientImpl: Submitted application application_1654591879954_0009
22/06/09 15:33:17 INFO mapreduce.Job: The url to track the job: http://localhost:8088/proxy/application_1654591879954_0009/
22/06/09 15:33:17 INFO mapreduce.Job: Running job: job_1654591879954_0009
22/06/09 15:33:23 INFO mapreduce.Job: Job job_1654591879954_0009 running in uber mode : false
22/06/09 15:33:23 INFO mapreduce.Job: map 0% reduce 0%
22/06/09 15:33:29 INFO mapreduce.Job: map 100% reduce 0%
22/06/09 15:33:29 INFO mapreduce.Job: Job job_1654591879954_0009 completed successfully
22/06/09 15:33:29 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=156473
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=87
HDFS: Number of bytes written=68
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Other local map tasks=1
Total time spent by all maps in occupied slots (ms)=2213
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=2213
Total vcore-milliseconds taken by all map tasks=2213
Total megabyte-milliseconds taken by all map tasks=2266112
Map-Reduce Framework
Map input records=4
Map output records=4
Input split bytes=87
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=40
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=107479040
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=68
22/06/09 15:33:29 INFO mapreduce.ImportJobBase: Transferred 68 bytes in 12.5582 seconds (5.4148 bytes/sec)
22/06/09 15:33:29 INFO mapreduce.ImportJobBase: Retrieved 4 records.
$ hdfs dfs -text "/user/sun_xo/sqoop/fruit/*"
101,香瓜,800.0
102,草莓,150.0
103,苹果,120.0
104,柠檬,200.0
3) Export to MySQL
$ cat fruit_exp.txt
105,橙子,115.0
106,香蕉,110.0
$ hdfs dfs -mkdir -p /user/sun_xo/sqoop/export/fruit
$ hdfs dfs -put fruit_exp.txt /user/sun_xo/sqoop/export/fruit
$ bin/sqoop export --connect jdbc:mysql://localhost:3306/manga --username manga --password manga \
--table fruit --export-dir /user/sun_xo/sqoop/export/fruit \
--num-mappers 1
22/06/09 15:39:59 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7
22/06/09 15:39:59 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
22/06/09 15:39:59 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
22/06/09 15:39:59 INFO tool.CodeGenTool: Beginning code generation
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
22/06/09 15:39:59 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:39:59 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:39:59 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /Users/sun_xo/work/hadoop
Note: /tmp/sqoop-sun_xo/compile/89d2056e28755b6b08867f478c64d7b4/fruit.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
22/06/09 15:40:01 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-sun_xo/compile/89d2056e28755b6b08867f478c64d7b4/fruit.jar
22/06/09 15:40:01 INFO mapreduce.ExportJobBase: Beginning export of fruit
22/06/09 15:40:01 INFO Configuration.deprecation: mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
22/06/09 15:40:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 15:40:01 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
22/06/09 15:40:02 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
22/06/09 15:40:02 INFO input.FileInputFormat: Total input files to process : 1
22/06/09 15:40:02 INFO input.FileInputFormat: Total input files to process : 1
22/06/09 15:40:02 INFO mapreduce.JobSubmitter: number of splits:1
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
22/06/09 15:40:03 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1654591879954_0010
22/06/09 15:40:03 INFO impl.YarnClientImpl: Submitted application application_1654591879954_0010
22/06/09 15:40:03 INFO mapreduce.Job: The url to track the job: http://localhost:8088/proxy/application_1654591879954_0010/
22/06/09 15:40:03 INFO mapreduce.Job: Running job: job_1654591879954_0010
22/06/09 15:40:10 INFO mapreduce.Job: Job job_1654591879954_0010 running in uber mode : false
22/06/09 15:40:10 INFO mapreduce.Job: map 0% reduce 0%
22/06/09 15:40:15 INFO mapreduce.Job: map 100% reduce 0%
22/06/09 15:40:16 INFO mapreduce.Job: Job job_1654591879954_0010 completed successfully
22/06/09 15:40:16 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=156162
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=184
HDFS: Number of bytes written=0
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=0
Job Counters
Launched map tasks=1
Rack-local map tasks=1
Total time spent by all maps in occupied slots (ms)=2376
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=2376
Total vcore-milliseconds taken by all map tasks=2376
Total megabyte-milliseconds taken by all map tasks=2433024
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=147
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=37
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=121110528
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=0
22/06/09 15:40:16 INFO mapreduce.ExportJobBase: Transferred 184 bytes in 14.2446 seconds (12.9172 bytes/sec)
22/06/09 15:40:16 INFO mapreduce.ExportJobBase: Exported 2 records.
$ mysql -umanga -pmanga manga
mysql> select * from fruit;
+----------+--------+-------+
| fruit_id | name | price |
+----------+--------+-------+
| 101 | 香瓜 | 800 |
| 102 | 草莓 | 150 |
| 103 | 苹果 | 120 |
| 104 | 柠檬 | 200 |
| 105 | 橙子 | 115 |
| 106 | 香蕉 | 110 |
+----------+--------+-------+
6 rows in set (0.00 sec)