解压rar和zip文件名中文乱码问题,以及获取zip文件中的内容

import java.io.*;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import com.github.junrar.Archive;
import com.github.junrar.rarfile.FileHeader;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.tika.Tika;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.stereotype.Service;
import org.xml.sax.SAXException;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.zip.ZipFile;

public class RARTool {
    public void unrar(String RarPath, String targetPath) throws Exception {
        Archive archive = null;
        FileOutputStream outputStream = null;
        archive = new Archive(new File(RarPath));
        FileHeader f = archive.nextFileHeader();
        while (f != null) {
            // 当前为文件夹,下移
            if (f.isDirectory()) {
                f = archive.nextFileHeader();
                continue;
            }
            // 判断编码,解决中文乱码的问题
            String localpath = f.isUnicode() ? f.getFileNameW() : f.getFileNameString();
            // 得到的localpath分隔符为"\",转为为"/"
            localpath = targetPath + localpath.replaceAll("\\\\", File.separator);
            int end = localpath.lastIndexOf(File.separator);
            String dir = localpath;
            if (end != -1) {
                dir = localpath.substring(0, end);
            }
            // 需要创建文件夹
            File file = new File(dir);
            if (!file.exists()) {
                file.mkdir();
            }
            outputStream = new FileOutputStream(localpath);
            // archive自己的生成文件的方法
            archive.extractFile(f, outputStream);
            f = archive.nextFileHeader();
        }
        outputStream.close();
        archive.close();
    }
    public List<String> getZipText(String sfile) {
        List<String> tempString = new ArrayList<String>();
        StringBuffer sbf = new StringBuffer();
        InputStream input = null;
        AutoDetectReader dr = null;
        try {
            File file = new File(sfile);
//            //利用Tika的AutoDetectReader类检测文件的编码格式
            dr = new AutoDetectReader(new FileInputStream(file));
            String charset = dr.getCharset().name();
            System.out.println("********charset********:" +charset);
            input = new FileInputStream(file);
//            ZipInputStream zip = new ZipInputStream(input);
            BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(sfile));
//            ZipInputStream zip = new ZipInputStream(bufferedInputStream, Charset.forName("utf-8"));
//            根据检测的编码格式生成文件流
            ZipInputStream zip = new ZipInputStream(bufferedInputStream, dr.getCharset());
            ZipEntry entry = zip.getNextEntry();
            BodyContentHandler textHandler = new BodyContentHandler();
            Metadata metadata = new Metadata();
            Parser parser = new AutoDetectParser();
            List<String> lfile = new ArrayList<>();
            while (entry != null) {
                if (entry.isDirectory()) {
                    System.out.println("****entry=" + entry.getName() + " " + entry.getSize());
                } else {
//                    Scanner sc = new Scanner(entry);
//                    while (sc.hasNextLine()) {
//                        System.out.println(sc.nextLine());
//                    }
//                    if (entry.getName().endsWith(".txt") || entry.getName().endsWith(".pdf") || entry.getName().endsWith(".docx")) {
                        System.out.println("####entry=" + entry.getName() + " " + entry.getSize());
                        lfile.add(entry.getName());
                        parser.parse(input, textHandler, metadata, new ParseContext());
                        tempString.add(textHandler.toString());
//                    } else {
                        BufferedReader br = new BufferedReader(new InputStreamReader(entry));
                        String line;
                        while ((line = br.readLine()) != null) {
                            System.out.println(line);
                        }
                        br.close();
//                    }
                }
                entry = zip.getNextEntry();
            }
            zip.close();
            input.close();
            for (String sfile1 : lfile) {
                System.out.println("$$$$$$$:" +sfile1 +"&&&&");
            }
            for (String text : tempString) {
                System.out.println("Apache Tika - Converted input string : " + text);
                sbf.append(text);
                System.out.println("Final text from all the three files " + sbf.toString());
            }
        }catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SAXException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (TikaException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return tempString;
    }
}

 

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值