搭建Sqoop环境

1) Build and deploy
## download sqoop-1.4.7.tar.gz from http://archive.apache.org/dist/
## unpack to ~/work/sqoop-src-1.4.7
## modify project files
$ cd ~/work/sqoop-src-1.4.7
$ diff -u build.xml.orig build.xml  # change to valid repository, omit docs

-- build.xml.orig	2017-12-19 07:00:00.000000000 +0800
+++ build.xml	2022-06-08 17:27:49.000000000 +0800
@@ -171,9 +171,9 @@
   <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
   <property name="ivy.jar" location="${lib.dir}/ivy-${ivy.version}.jar"/>
   <property name="ivy_repo_url"
-  value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
+  value="https://maven.aliyun.com/repository/public/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
   <property name="mvn_repo_url"
-  value="http://repo2.maven.org/maven2/org/apache/maven/maven-ant-tasks/${mvn.version}/maven-ant-tasks-${mvn.version}.jar"/>
+  value="https://maven.aliyun.com/repository/public/org/apache/maven/maven-ant-tasks/${mvn.version}/maven-ant-tasks-${mvn.version}.jar"/>
   <property name="mvn.jar"
       location="${build.dir}/maven-ant-tasks-${mvn.version}.jar" />
   <property name="build.ivy.dir" location="${build.dir}/ivy" />
@@ -184,7 +184,7 @@
 
   <!--this is the naming policy for artifacts we want pulled down-->
   <property name="ivy.artifact.retrieve.pattern"
-      value="${name}/[conf]/[artifact]-[revision](-[classifier]).[ext]"/>
+      value="${name}/[conf]/[artifact]-[type]-[revision](-[classifier]).[ext]"/>
 
   <!--test related properties -->
   <property name="sqoop.test.oracle.connectstring" value="jdbc:oracle:thin:@//localhost/xe"/>
@@ -491,7 +491,7 @@
   </target>
 
   <target name="package"
-      depends="jar-all,compile-all,docs,ivy-retrieve-redist,scripts"
+      depends="jar-all,compile-all,ivy-retrieve-redist,scripts"
       description="Create a redistributable package">
 
     <mkdir dir="${dist.dir}"/>
@@ -542,7 +542,7 @@
     </copy>
 
     <!-- copy in documentation build artifacts -->
-    <copy todir="${dist.dir}/docs" includeEmptyDirs="false" flatten="false">
+    <!-- copy todir="${dist.dir}/docs" includeEmptyDirs="false" flatten="false">
       <fileset dir="${build.dir}/docs">
         <include name="**/*.html" />
         <include name="**/*.css" />
@@ -553,7 +553,7 @@
       <fileset dir="${build.dir}/docs">
         <include name="**/*.gz" />
       </fileset>
-    </copy>
+    </copy -->
 
     <!-- copy in auto-generated bin scripts -->
     <copy todir="${dist.dir}/bin" includeEmptyDirs="false" flatten="true">

$ diff -u ivy/ivysettings.xml.orig ivy/ivysettings.xml # change repository

--- ivy/ivysettings.xml.orig	2017-12-19 07:00:00.000000000 +0800
+++ ivy/ivysettings.xml	2022-06-08 17:29:13.000000000 +0800
@@ -31,7 +31,7 @@
           http://ibiblio.lsu.edu/main/pub/packages/maven2
           http://www.ibiblio.net/pub/packages/maven2
   -->
-  <property name="repo.maven.org" value="http://repo1.maven.org/maven2/"
+  <property name="repo.maven.org" value="https://maven.aliyun.com/repository/public/"
       override="false"/>
   <property name="snapshot.apache.org"
       value="https://repository.apache.org/content/repositories/snapshots/"

$ diff ivy/libraries.properties.orig ivy/libraries.properties # change hadoop version

--- ivy/libraries.properties.orig	2017-12-19 07:00:00.000000000 +0800
+++ ivy/libraries.properties	2022-06-08 17:29:00.000000000 +0800
@@ -54,6 +54,6 @@
 
 slf4j.version=1.7.7
 
-hadoop.version=2.6.0
+hadoop.version=2.8.0
 hbase.version=1.2.4
 hcatalog.version=1.2.1

## build, create the tar ball if build sucessful
$ ant tar       
$ cd ~/work
$ tar xvf sqoop-src-1.4.7/build/sqoop-1.4.7.bin__hadoop-2.8.0.tar.gz
$ mv sqoop-1.4.7.bin__hadoop-2.8.0 sqoop-1.4.7

## copy mysql driver
$ cp -p dwonloads/mysql-connector-java-8.0.28.jar sqoop-1.4.7/lib/

## customize environment
$ cd ~/work/sqoop-1.4.7
$ diff -u conf/sqoop-env-template.sh conf/sqoop-env.sh

--- conf/sqoop-env-template.sh	2022-06-08 17:31:21.000000000 +0800
+++ conf/sqoop-env.sh	2022-06-09 09:11:44.000000000 +0800
@@ -20,16 +20,16 @@
 # Set Hadoop-specific environment variables here.
 
 #Set path to where bin/hadoop is available
-#export HADOOP_COMMON_HOME=
+export HADOOP_COMMON_HOME=$HADOOP_HOME
 
 #Set path to where hadoop-*-core.jar is available
-#export HADOOP_MAPRED_HOME=
+export HADOOP_MAPRED_HOME=$HADOOP_HOME
 
 #set the path to where bin/hbase is available
 #export HBASE_HOME=
 
 #Set the path to where bin/hive is available
-#export HIVE_HOME=
+export HIVE_HOME=/Users/sun_xo/work/hive-2.3.9
 
 #Set the path for where zookeper config dir is
 #export ZOOCFGDIR=

2) Import from MySQL
$ cd ~/work/sqoop-1.4.7
## list tables
$ bin/sqoop list-tables --connect jdbc:mysql://localhost:3306/manga --username manga --password manga

...
export
form
form_detail
fruit

$ bin/sqoop import --connect jdbc:mysql://localhost:3306/manga --username manga --password manga \
    --table fruit --target-dir /user/sun_xo/sqoop/fruit \
    --num-mappers 1 --delete-target-dir

22/06/09 15:33:13 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7
22/06/09 15:33:13 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
22/06/09 15:33:13 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
22/06/09 15:33:13 INFO tool.CodeGenTool: Beginning code generation
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
22/06/09 15:33:13 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:33:14 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:33:14 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /Users/sun_xo/work/hadoop
Note: /tmp/sqoop-sun_xo/compile/47ca53cd7c2f9abf409e90ef9e7734d3/fruit.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
22/06/09 15:33:15 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-sun_xo/compile/47ca53cd7c2f9abf409e90ef9e7734d3/fruit.jar
22/06/09 15:33:15 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 15:33:16 INFO tool.ImportTool: Destination directory /user/sun_xo/sqoop/fruit deleted.
22/06/09 15:33:16 WARN manager.MySQLManager: It looks like you are importing from mysql.
22/06/09 15:33:16 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
22/06/09 15:33:16 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
22/06/09 15:33:16 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
22/06/09 15:33:16 INFO mapreduce.ImportJobBase: Beginning import of fruit
22/06/09 15:33:16 INFO Configuration.deprecation: mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
22/06/09 15:33:16 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
22/06/09 15:33:16 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
22/06/09 15:33:16 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
22/06/09 15:33:17 INFO db.DBInputFormat: Using read commited transaction isolation
22/06/09 15:33:17 INFO mapreduce.JobSubmitter: number of splits:1
22/06/09 15:33:17 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1654591879954_0009
22/06/09 15:33:17 INFO impl.YarnClientImpl: Submitted application application_1654591879954_0009
22/06/09 15:33:17 INFO mapreduce.Job: The url to track the job: http://localhost:8088/proxy/application_1654591879954_0009/
22/06/09 15:33:17 INFO mapreduce.Job: Running job: job_1654591879954_0009
22/06/09 15:33:23 INFO mapreduce.Job: Job job_1654591879954_0009 running in uber mode : false
22/06/09 15:33:23 INFO mapreduce.Job:  map 0% reduce 0%
22/06/09 15:33:29 INFO mapreduce.Job:  map 100% reduce 0%
22/06/09 15:33:29 INFO mapreduce.Job: Job job_1654591879954_0009 completed successfully
22/06/09 15:33:29 INFO mapreduce.Job: Counters: 30
	File System Counters
		FILE: Number of bytes read=0
		FILE: Number of bytes written=156473
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=87
		HDFS: Number of bytes written=68
		HDFS: Number of read operations=4
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=2
	Job Counters 
		Launched map tasks=1
		Other local map tasks=1
		Total time spent by all maps in occupied slots (ms)=2213
		Total time spent by all reduces in occupied slots (ms)=0
		Total time spent by all map tasks (ms)=2213
		Total vcore-milliseconds taken by all map tasks=2213
		Total megabyte-milliseconds taken by all map tasks=2266112
	Map-Reduce Framework
		Map input records=4
		Map output records=4
		Input split bytes=87
		Spilled Records=0
		Failed Shuffles=0
		Merged Map outputs=0
		GC time elapsed (ms)=40
		CPU time spent (ms)=0
		Physical memory (bytes) snapshot=0
		Virtual memory (bytes) snapshot=0
		Total committed heap usage (bytes)=107479040
	File Input Format Counters 
		Bytes Read=0
	File Output Format Counters 
		Bytes Written=68
22/06/09 15:33:29 INFO mapreduce.ImportJobBase: Transferred 68 bytes in 12.5582 seconds (5.4148 bytes/sec)
22/06/09 15:33:29 INFO mapreduce.ImportJobBase: Retrieved 4 records.

$ hdfs dfs -text "/user/sun_xo/sqoop/fruit/*"

101,香瓜,800.0
102,草莓,150.0
103,苹果,120.0
104,柠檬,200.0

3) Export to MySQL
$ cat fruit_exp.txt

105,橙子,115.0
106,香蕉,110.0

$ hdfs dfs -mkdir -p /user/sun_xo/sqoop/export/fruit
$ hdfs dfs -put fruit_exp.txt /user/sun_xo/sqoop/export/fruit
$ bin/sqoop export --connect jdbc:mysql://localhost:3306/manga --username manga --password manga \
    --table fruit --export-dir /user/sun_xo/sqoop/export/fruit \
    --num-mappers 1

22/06/09 15:39:59 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7
22/06/09 15:39:59 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
22/06/09 15:39:59 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
22/06/09 15:39:59 INFO tool.CodeGenTool: Beginning code generation
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
22/06/09 15:39:59 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:39:59 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `fruit` AS t LIMIT 1
22/06/09 15:39:59 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /Users/sun_xo/work/hadoop
Note: /tmp/sqoop-sun_xo/compile/89d2056e28755b6b08867f478c64d7b4/fruit.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
22/06/09 15:40:01 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-sun_xo/compile/89d2056e28755b6b08867f478c64d7b4/fruit.jar
22/06/09 15:40:01 INFO mapreduce.ExportJobBase: Beginning export of fruit
22/06/09 15:40:01 INFO Configuration.deprecation: mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
22/06/09 15:40:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/06/09 15:40:01 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
22/06/09 15:40:02 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
22/06/09 15:40:02 INFO input.FileInputFormat: Total input files to process : 1
22/06/09 15:40:02 INFO input.FileInputFormat: Total input files to process : 1
22/06/09 15:40:02 INFO mapreduce.JobSubmitter: number of splits:1
22/06/09 15:40:02 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
22/06/09 15:40:03 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1654591879954_0010
22/06/09 15:40:03 INFO impl.YarnClientImpl: Submitted application application_1654591879954_0010
22/06/09 15:40:03 INFO mapreduce.Job: The url to track the job: http://localhost:8088/proxy/application_1654591879954_0010/
22/06/09 15:40:03 INFO mapreduce.Job: Running job: job_1654591879954_0010
22/06/09 15:40:10 INFO mapreduce.Job: Job job_1654591879954_0010 running in uber mode : false
22/06/09 15:40:10 INFO mapreduce.Job:  map 0% reduce 0%
22/06/09 15:40:15 INFO mapreduce.Job:  map 100% reduce 0%
22/06/09 15:40:16 INFO mapreduce.Job: Job job_1654591879954_0010 completed successfully
22/06/09 15:40:16 INFO mapreduce.Job: Counters: 30
	File System Counters
		FILE: Number of bytes read=0
		FILE: Number of bytes written=156162
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=184
		HDFS: Number of bytes written=0
		HDFS: Number of read operations=4
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=0
	Job Counters 
		Launched map tasks=1
		Rack-local map tasks=1
		Total time spent by all maps in occupied slots (ms)=2376
		Total time spent by all reduces in occupied slots (ms)=0
		Total time spent by all map tasks (ms)=2376
		Total vcore-milliseconds taken by all map tasks=2376
		Total megabyte-milliseconds taken by all map tasks=2433024
	Map-Reduce Framework
		Map input records=2
		Map output records=2
		Input split bytes=147
		Spilled Records=0
		Failed Shuffles=0
		Merged Map outputs=0
		GC time elapsed (ms)=37
		CPU time spent (ms)=0
		Physical memory (bytes) snapshot=0
		Virtual memory (bytes) snapshot=0
		Total committed heap usage (bytes)=121110528
	File Input Format Counters 
		Bytes Read=0
	File Output Format Counters 
		Bytes Written=0
22/06/09 15:40:16 INFO mapreduce.ExportJobBase: Transferred 184 bytes in 14.2446 seconds (12.9172 bytes/sec)
22/06/09 15:40:16 INFO mapreduce.ExportJobBase: Exported 2 records.

$ mysql -umanga -pmanga manga
mysql> select * from fruit;

+----------+--------+-------+
| fruit_id | name   | price |
+----------+--------+-------+
|      101 | 香瓜   |   800 |
|      102 | 草莓   |   150 |
|      103 | 苹果   |   120 |
|      104 | 柠檬   |   200 |
|      105 | 橙子   |   115 |
|      106 | 香蕉   |   110 |
+----------+--------+-------+
6 rows in set (0.00 sec)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值