HDFS命令和Java API
一、安装eclipse
1、官网下载Eclipse安装包eclipse-inst-win64:https://www.eclipse.org/downloads/
2、运行安装包,选择Eclipse IDE for Java Developers,开始安装。
二、安装Maven
1、Maven官网http://maven.apache.org/download.cgi,选择最近的镜像,选择Maven压缩包apache-maven-3.6.0-bin.tar.gz开始下载。
2、解压Maven压缩包apache-maven-3.6.0-bin.tar.gz,解压后的文件夹\apache-maven-3.6.0,将其考入自定义路径,如C:\eclipse\apache-maven-3.6.0。
3、配置Maven的环境变量,Path添加Maven的\bin的安装路径,cmd命令行运行mvn -v,查看是否成功安装配置。
三、Eclipse配置Maven
1、修改settings.xml
在安装所在文件夹\apache-maven-3.6.0下面,新建\repository文件夹,作为Maven本地仓库。在文件settings.xml里添加 C:\eclipse\apache-maven-3.6.0\repository。
2、配置Maven的installation和User Settings
【Preferences】→【Maven】→【Installations】配置Maven安装路径,【User Settings】配置settings.xml的路径。
3、添加pom.xml依赖
依赖(Maven Repository: hadoop)所在网址:https://mvnrepository.com/tags/hadoop ,找到对应版本的三个依赖(如下),拷贝至pom.xml的与之间,保存之后自动生成Maven Dependencies。
pom.xml代码:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>download</groupId>
<artifactId>Download</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId> maven-assembly-plugin </artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>hdfs.files.HDFSDownload</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
四、在Eclipse里新建Maven Project
五、HDFS的Java程序
HDFSMKdir.java新建HDFS目录/aadir
package hdfs.files;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSMKdir {
public static void main(String[] args) throws IOException {
//设置root权限
System.setProperty("HADOOP_USER_NAME", "root");
//创建HDFS连接对象client
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop:9000");
FileSystem client = FileSystem.get(conf);
//在HDFS的根目录下创建目录aadir
client.mkdirs(new Path("/aadir"));
//关闭连接对象
client.close();
//输出“successfull”
System.out.println("successfully!");
}
}
HDFSUpload.java写入/上传 本地文件c:\hdfs\aa.txt 到HDFS的/aadir目录下。
package hdfs.files;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSUpload {
private static InputStream input;
private static OutputStream output;
public static void main(String[] args) throws IOException{
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop:9000");
FileSystem client = FileSystem.get(conf);
input = new FileInputStream("/usr/local/hdfs/aa.txt");
output = client.create(new Path("/aadir/aaout.txt"));
byte[] buffer = new byte[1024];
int len = 0;
while ((len=input.read(buffer))!=-1){
output.write(buffer, 0, len);
}
output.flush();
//IOUtils.copy(input, output);
input.close();
output.close();
}
}
HDFSDownload.java读/下载 HDFS的根目录文件/bb.txt 到本地c:\hdfs目录下。
package hdfs.files;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSDownload {
// 声明输入流、输出流
private static InputStream input;
private static OutputStream output;
public static void main(String[] args) throws IOException {
//设置root权限
System.setProperty("HADOOP_USER_NAME", "root");
//创建HDFS连接对象client
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop:9000");
FileSystem client = FileSystem.get(conf);
//创建本地文件的输入流
input = new FileInputStream("/usr/local/hdfs/bbout.txt");
//创建HDFS的输出流
output = client.create(new Path("/bb.txt"));
//写文件到HDFS
byte[] buffer = new byte[1024];
int len = 0;
while ((len=input.read(buffer))!=-1){
output.write(buffer, 0, len);
}
//防止输出数据不完整
output.flush();
//使用工具类IOUtils上传或下载
//IOUtils.copy(input, output);
//关闭输入输出流
input.close();
output.close();
}
}
HDFSFileIfExist.java查看HDFS文件/bb.txt是否存在。
package hdfs.files;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSFileIfExist {
public static void main(String[] args) throws IOException{
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop:9000");
FileSystem client = FileSystem.get(conf);
String fileName = "/bb.txt";
if (client.exists(new Path(fileName))) {
System.out.println("seccessfully!");
}else {
System.out.println("file no exist!");
}
}
}
运行Java程序,运行成功后cmd中打包(命令:mvn assembly:assembly),通过xftp打包上传到xshell