前言
分布式集群除了可以利用简单的shell命令进行操作外,还可以使用Java API的方式进行操作,下面是操作流程与代码分享。
HDFS的JavaAPI操作
首先,创建maven工程,并导入jar包
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-mr1-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0-cdh5.14.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>RELEASE</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
<!-- <verbal>true</verbal>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<minimizeJar>true</minimizeJar>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
开发hdfs的javaAPI操作
1、创建文件夹
@Test
public void mkdirToHdfs() throws IOException {
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS","hdfs://node01:8020");
//FileSystem文件系统对象
FileSystem fs = FileSystem.get(configuration);
//FileSystem的mkdir方法
boolean b = fs.mkdirs(new Path("/zzz/dir01"));
//FileSystem关闭
fs.close();
}
2、文件上传
@Test
public void uploadFile() throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:8020"), configuration);
fs.copyFromLocalFile(new Path("file:///E:\\***\\***.txt"),new Path("/zzz/***.txt"));
fs.close();
}
3、文件下载
@Test
public void downloadFile() throws URISyntaxException, IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), configuration);
fileSystem.copyToLocalFile(new Path("/zzz/dir01/hello.txt"),new Path("file:///E:\\大数据\\hi.txt"));
fileSystem.close();
}
4、文件删除
@Test
public void deleteFile() throws URISyntaxException, IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), configuration);
boolean b = fileSystem.deleteOnExit(new Path("/zzz/cache"));
fileSystem.close();
}
5、文件重命名
@Test
public void renameFile() throws URISyntaxException, IOException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), configuration);
boolean b = fileSystem.rename(new Path("/zzz/dir01/hi.txt"), new Path("/zzz/dir01/hello.txt"));
fileSystem.close();
}
6、查看hdfs文件相关信息
@Test
public void testListFiles() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), configuration, "hadoop");
RemoteIterator<LocatedFileStatus> listFiles = fileSystem.listFiles(new Path("/zzz/dir01"), true);
while (listFiles.hasNext()){
LocatedFileStatus next = listFiles.next();
System.out.println("name: " + next.getPath().getName());
System.out.println("Length: " + next.getLen());
System.out.println("Permission: " + next.getPermission());
System.out.println("replication: " + next.getReplication());
System.out.println("----------------------------");
BlockLocation[] blockLocations = next.getBlockLocations();
for (BlockLocation blockLocation:blockLocations){
String[] hosts = blockLocation.getHosts();
for (String host:hosts){
System.out.println(hosts.length);
System.out.println(host);
}
}
System.out.println("*********");
}
fileSystem.close();
}
7、通过io流进行数据上传操作
@Test
public void putFileToHDFS() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), configuration, "hadoop");
FileInputStream fileInputStream = new FileInputStream(new File("E:\\大数据\\***.pdf"));
FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/zzz/dir01/***.pdf"));
IOUtils.copy(fileInputStream,fsDataOutputStream);
IOUtils.closeQuietly(fileInputStream);
IOUtils.closeQuietly(fsDataOutputStream);
fileSystem.close();
}
8、通过IO流从hdfs上面下载文件
@Test
public void downloadFileFromHDFS() throws IOException, URISyntaxException, InterruptedException {
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), configuration, "hadoop");
FileOutputStream fileOutputStream = new FileOutputStream(new File("E:\\大数据\\hi.txt"));
FSDataInputStream fsDataInputStream = fileSystem.open(new Path("/zzz/dir01/hello.txt"));
IOUtils.copy(fsDataInputStream,fileOutputStream);
IOUtils.closeQuietly(fileOutputStream);
IOUtils.closeQuietly(fsDataInputStream);
fileSystem.close();
}
9、hdfs的小文件合并
@Test
public void mergeFile() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://node01:8020"), configuration, "hadoop");
FSDataOutputStream fsDataOutputStream = fs.create(new Path("/zzz/dir01/bigfile.xml"));
//获取本地文件系统 local
LocalFileSystem local = FileSystem.getLocal(configuration);
//读取本地的文件
FileStatus[] fileStatuses = local.listStatus(new Path("E:\\***"));
for (FileStatus fileStatus:fileStatuses) {
//获取每一个本地的文件路径
Path path = fileStatus.getPath();
//读取本地小文件
FSDataInputStream dataInputStream = local.open(path);
IOUtils.copy(dataInputStream,fsDataOutputStream);
IOUtils.closeQuietly(dataInputStream);
}
IOUtils.closeQuietly(fsDataOutputStream);
local.close();
fs.close();
}
最后,需要注意的是,不要导错包
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Test;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;