干货-java HDFS系统上解压多文件zip压缩包

32 篇文章 1 订阅
31 篇文章 0 订阅
package com.zx.utils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.*;

public class HdfsUnZip {
    public static void loadZipFileData(String hdfsFilePath) throws IOException {
        Configuration conf = new Configuration();
        String basePath = "hdfs://master:9000/";
        Path hdfs = new Path(basePath);
        //设置hdfs指定用户名root,否则没有权限访问hdfs
        System.setProperty("HADOOP_USER_NAME", "root");
        FileSystem fs = hdfs.getFileSystem(conf);

        FSDataInputStream fsInputStream = null;
        ZipArchiveInputStream zipInputStream = null;
        ZipArchiveEntry zipEntry = null;
        FSDataOutputStream mergerout = null;

        System.out.println(hdfsFilePath);

        try {
            //读取hdfs上文件到输入流
            fsInputStream = fs.open(new Path(hdfsFilePath));
            //将输入流转成字节数组
            byte[] b = toByteArray(fsInputStream);
            //去除字节数组中xx无用信息(实际为压缩包无用的头信息)
            b = offXXData(b);
            //将字节数组读入流,并创建zip处理流对象
            zipInputStream = new ZipArchiveInputStream( new ByteArrayInputStream(b));
            //判断是否能获取zipEntity对象
            while ((zipEntry = zipInputStream.getNextZipEntry()) != null) {
                //获取当前解压的文件名
                String entryName = zipEntry.getName();
                System.out.println("fileName:"+entryName);
                System.out.println(basePath+entryName);
                //判断解压文件是否非文件夹
                if (!zipEntry.isDirectory()) {
                    System.out.println("is file");
                    //在hdfs上创建指定文件
                    mergerout = fs.create(new Path(basePath+entryName));
                    int bygeSize1 = 10 * 1024 * 1024;
                    byte[] buffer1 = new byte[bygeSize1];
                    int nNumber;
                    //并将解压的内容写入hdfs文件
                    while ((nNumber = zipInputStream.read(buffer1, 0, bygeSize1)) != -1) {
                        mergerout.write(buffer1, 0, nNumber);
                    }
                    mergerout.flush();
                }else{
                    System.out.println("is Direcotry");
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            if(mergerout!=null){
                mergerout.close();
            }
            if(zipInputStream!=null){
                zipInputStream.close();
            }
            if(fsInputStream!=null){
                fsInputStream.close();
            }
        }
    }

    public static void zipHdfsFile(String hdfsFilePath) throws IOException {
        Configuration conf = new Configuration();
        String basePath = "hdfs://master:9000/";
        Path hdfs = new Path(basePath);
        //设置hdfs指定用户名root,否则没有权限访问hdfs
        System.setProperty("HADOOP_USER_NAME", "root");
        FileSystem fs = hdfs.getFileSystem(conf);

        FSDataInputStream fsInputStream = null;
        ZipArchiveInputStream zipInputStream = null;
        ZipArchiveEntry zipEntry = null;
        FSDataOutputStream mergerout = null;

        System.out.println(hdfsFilePath);

        try {
            //读取hdfs上文件到输入流
            fsInputStream = fs.open(new Path(hdfsFilePath));
            zipInputStream = new ZipArchiveInputStream(fsInputStream);
            //判断是否能获取zipEntity对象
            while ((zipEntry = zipInputStream.getNextZipEntry()) != null) {
                //获取当前解压的文件名
                String entryName = zipEntry.getName();
                System.out.println("fileName:"+entryName);
                System.out.println(basePath+"process/"+entryName);
                //判断解压文件是否非文件夹
                if (!zipEntry.isDirectory()) {
                    System.out.println("is file");
                    //在hdfs上创建指定文件
                    mergerout = fs.create(new Path(basePath+"process/"+entryName));
                    int bygeSize1 = 10 * 1024 * 1024;
                    byte[] buffer1 = new byte[bygeSize1];
                    int nNumber;
                    //并将解压的内容写入hdfs文件
                    while ((nNumber = zipInputStream.read(buffer1, 0, bygeSize1)) != -1) {
                        mergerout.write(buffer1, 0, nNumber);
                    }
                    mergerout.flush();
                }else{
                    System.out.println("is Direcotry");
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            if(mergerout!=null){
                mergerout.close();
            }
            if(zipInputStream!=null){
                zipInputStream.close();
            }
            if(fsInputStream!=null){
                fsInputStream.close();
            }
        }
    }

    public static void main(String[]args) throws IOException {
        zipHdfsFile("hdfs://master:9000/input.zip");
        loadZipFileData("hdfs://master:9000/input/2_74_1517985979_002_00_805690.zip");
    }


    /**
     * 获得去除XX后的数据
     *
     * @param data
     * @return
     * @throws IOException
     */
    public static byte[] offXXData(byte[] data) throws IOException {

        ByteArrayOutputStream out = null;
        CustomLineInputStream lis = null;

        try {
            out = new ByteArrayOutputStream();
            lis = new CustomLineInputStream(new ByteArrayInputStream(data));
            String line = null;
            List<Byte> bytes = new ArrayList<Byte>();

            boolean isFirstLine = true;
            while ((line = lis.readLineBytes(bytes)) != null) {

                if (isFirstLine && (!line.startsWith("XX-"))) {
                    return data;
                } else {
                    isFirstLine = false;
                }

                if (!line.startsWith("XX-")) {
                    break;
                }
                bytes.clear();

            }
            byte[] bs = new byte[1024];
            int i = 0, flag = 0;
            for (int j = 0; j < bytes.size(); j++) {
                byte b = bytes.get(j);
                if ((b == '\n' || b == '\r') && flag == 0) {
                    continue;
                } else {
                    flag++;
                }
                out.write(bytes.get(j));
            }

            while ((i = lis.read(bs)) != -1) {
                out.write(bs, 0, i);
            }

            out.flush();
            data = out.toByteArray();
        } catch (IOException e) {
            e.printStackTrace();
            throw e;
        } finally {
            if (out != null) {
                try {
                    out.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (lis != null) {
                try {
                    lis.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return data;
    }

    //将文件读入流,然后换成byte数组
    public static byte[] File2byte(File file)
    {
        byte[] buffer = null;
        try
        {
            FileInputStream fis = new FileInputStream(file);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            byte[] b = new byte[1024];
            int n;
            while ((n = fis.read(b)) != -1)
            {
                bos.write(b, 0, n);
            }
            fis.close();
            bos.close();
            buffer = bos.toByteArray();
        }
        catch (FileNotFoundException e)
        {
            e.printStackTrace();
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
        return buffer;
    }

    //输入流转成byte数组
    public static byte[] toByteArray(InputStream input) throws IOException {
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        byte[] buffer = new byte[4096];
        int n = 0;
        while (-1 != (n = input.read(buffer))) {
            output.write(buffer, 0, n);
        }
        return output.toByteArray();
    }
}
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
好的,我可以为您解答有关HDFS-JAVA接口上传文件的问题。 首先,您需要在HDFS-JAVA接口中实现一个上传文件的方法。这个方法可以使用Hadoop中的FileSystem和FSDataOutputStream类来实现。以下是一个示例代码: ``` public void uploadFile(String localFilePath, String hdfsFilePath) { Configuration conf = new Configuration(); try { // 获取HDFS文件系统对象 FileSystem fs = FileSystem.get(URI.create(hdfsFilePath), conf); // 创建HDFS文件输出流 FSDataOutputStream outputStream = fs.create(new Path(hdfsFilePath)); // 创建本地文件输入流 FileInputStream inputStream = new FileInputStream(localFilePath); // 将本地文件内容写入HDFS文件 byte[] buffer = new byte[1024]; int length; while ((length = inputStream.read(buffer)) > 0) { outputStream.write(buffer, 0, length); } // 关闭流 inputStream.close(); outputStream.close(); fs.close(); } catch (IOException e) { e.printStackTrace(); } } ``` 在这个方法中,您需要传入本地文件的路径和HDFS文件的路径,然后将本地文件内容写入到HDFS文件中。 接下来,您可以在HDFS-JAVA接口的API中调用这个上传文件的方法。在API中,您需要传入本地文件的路径和HDFS文件的路径。以下是一个示例代码: ``` @PostMapping("/upload") public ResponseEntity<String> uploadFile(@RequestParam("localFilePath") String localFilePath, @RequestParam("hdfsFilePath") String hdfsFilePath) { try { // 调用上传文件的方法 uploadFile(localFilePath, hdfsFilePath); return ResponseEntity.ok("文件上传成功"); } catch (IOException e) { e.printStackTrace(); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("文件上传失败"); } } ``` 在这个API中,我们首先从请求参数中获取本地文件的路径和HDFS文件的路径,然后调用之前实现的上传文件的方法将本地文件内容写入到HDFS文件中。 这样,您就可以在HDFS-JAVA接口中实现上传文件的功能了。希望能对您有所帮助。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值