1.开发环境
windows
intellij idea2017
hadoop-2.9.1
mysql-5.7.21
sqoop-1.4.7
2.windows安装mysql-5.7.21
mysql下载路径
https://cdn.mysql.com/archives/mysql-installer/mysql-installer-community-5.7.21.0.msi
双击运行
2.1 如果出现下图内容
请下载并运行
https://download.microsoft.com/download/B/A/4/BA4A7E71-2906-4B2D-A0E1-80CF16844F5F/dotNetFx45_Full_setup.exe
然后再安装mysql
2.2 可依照下列图进行安装和配置
3.windows安装hadoop-2.9.1,请参照
https://blog.csdn.net/a781136776/article/details/80458513
4.利用intellij idea 创建maven项目
4.1 新建project
4.2 选择quickstart
4.3 填写GroupId和ArtifactId
4.4 项目详细
4.5 填写项目的名称和保存路径
5.代码
5.1 pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>sqoop</groupId>
<artifactId>util</artifactId>
<version>1.0-SNAPSHOT</version>
<name>util</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.sqoop</groupId>
<artifactId>sqoop</artifactId>
<version>1.4.7</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.36</version>
</dependency>
</dependencies>
</project>
5.2 SqoopTest.java
package sqoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.sqoop.Sqoop;
import org.apache.sqoop.tool.SqoopTool;
import org.apache.sqoop.util.OptionsFileUtil;
public class SqoopTest {
private static int importDataFromMysql() throws Exception {
String[] args = new String[] {
"--connect","jdbc:mysql://127.0.0.1:3306/test",
"--driver","com.mysql.jdbc.Driver",
"--username","root",
"--password","root",
"--table","log",
"--delete-target-dir",
"--num-mappers","1",
"--target-dir","/test/log"
};
String[] expandArguments = OptionsFileUtil.expandArguments(args);
SqoopTool tool = SqoopTool.getTool("import");
Configuration conf = new Configuration();
conf.set("fs.default.name", "hdfs://127.0.0.1:9000");//设置HDFS服务地址
Configuration loadPlugins = SqoopTool.loadPlugins(conf);
Sqoop sqoop = new Sqoop((com.cloudera.sqoop.tool.SqoopTool) tool, loadPlugins);
return Sqoop.runSqoop(sqoop, expandArguments);
}
public static void main(String[] args) throws Exception {
Long t1=System.currentTimeMillis();
importDataFromMysql();
System.out.println(System.currentTimeMillis()-t1);
}
}
6.在mysql中准备数据
create database test;
use test;
create table log(id int not null,time_stamp varchar(40),category varchar(40),type varchar(40),servername varchar(40),code varchar(40),msg varchar(40),primary key (id));
insert into log(id,time_stamp,category,type,servername,code,msg) values(1,'apr-8-2014-7:06:16-pm-pdt','notice','weblogicserver','adminserver','bea-000365','server state changed to standby');
insert into log(id,time_stamp,category,type,servername,code,msg) values(2,'apr-8-2014-7:06:17-pm-pdt','notice','weblogicserver','adminserver','bea-000365','server state changed to starting');
insert into log(id,time_stamp,category,type,servername,code,msg) values(3,'apr-8-2014-7:06:18-pm-pdt','notice','weblogicserver','adminserver','bea-000365','server state changed to admin');
insert into log(id,time_stamp,category,type,servername,code,msg) values(4,'apr-8-2014-7:06:19-pm-pdt','notice','weblogicserver','adminserver','bea-000365','server state changed to resuming');
insert into log(id,time_stamp,category,type,servername,code,msg) values(5,'apr-8-2014-7:06:20-pm-pdt','notice','weblogicserver','adminserver','bea-000361','started weblogic adminserver');
insert into log(id,time_stamp,category,type,servername,code,msg) values(6,'apr-8-2014-7:06:21-pm-pdt','notice','weblogicserver','adminserver','bea-000365','server state changed to running');
insert into log(id,time_stamp,category,type,servername,code,msg) values(7,'apr-8-2014-7:06:22-pm-pdt','notice','weblogicserver','adminserver','bea-000360','server started in running mode');
commit;
7.maven库中不包括sqoop-1.4.7.jar,所以请下载sqoop-1.4.7
https://archive.apache.org/dist/sqoop/1.4.7/sqoop-1.4.7.tar.gz
解压后将sqoop-1.4.7.bin__hadoop-2.6目录下的sqoop-1.4.7.jar复制到你的maven本地库,
如C:\Users\Administrator\.m2\repository\org\apache\sqoop\sqoop\1.4.7
8.运行
9.sqooputil.zip
https://download.csdn.net/download/a781136776/10438741