import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.*;
public class PackDecompressor {
public static int BUFFER_SIZE = 2048;
public static void unTarGZ(String file, FileSystem hdfsClient, String destDir) throws Exception {
File tarFile = new File(file);
if (StringUtils.isBlank(destDir)) {
destDir = tarFile.getParent();
}
final FSDataInputStream inputStream = hdfsClient.open(new Path(file));
destDir = destDir.endsWith(File.separator) ? destDir : destDir + File.separator;
unTar(new GzipCompressorInputStream(inputStream), destDir, hdfsClient);
}
private static void unTar(InputStream inputStream, String destDir, FileSystem hdfsClient) throws Exception {
TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream, BUFFER_SIZE);
TarArchiveEntry entry = null;
try {
while ((entry = tarIn.getNextTarEntry()) != null) {
if (entry.isDirectory()) {//是目录
createDirectory(destDir, entry.getName(), hdfsClient);//创建空目录
} else {//是文件
final Path path = new Path(destDir + File.separator + entry.getName());
final FSDataOutputStream outputStream = hdfsClient.create(path);
createDirectory(destDir + File.separator ,"", hdfsClient);//创建空目录
try {
int length = 0;
byte[] b = new byte[2048];
while ((length = tarIn.read(b)) != -1) {
outputStream.write(b, 0, length);
}
} finally {
IOUtils.closeQuietly(outputStream);
}
}
}
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
IOUtils.closeQuietly(tarIn);
}
}
public static void createDirectory(String outputDir, String subDir, FileSystem hdfsClient) throws IOException {
if (!(subDir == null || subDir.trim().equals(""))) {//子目录不为空
final Path f = new Path(outputDir + File.separator + subDir);
if (!hdfsClient.exists(f)) {
hdfsClient.mkdirs(f);
}
}
}
public static void main(String[] args) {
}
}
解压hdfs上的tgz文件到hdfs指定目录
最新推荐文章于 2023-03-24 14:05:39 发布
本文介绍了一个用于在Hadoop环境中解压Tar.gz文件的Java类。该类能够处理压缩文件,并将内容解压到指定的目标目录。具体实现了读取HDFS上的Tar.gz文件并逐个解压其中的内容,支持创建目录结构。
摘要由CSDN通过智能技术生成