各类型文档工具类

1 篇文章 0 订阅

1.DOC_SUFFIX = {".doc",".docx"};

2.TXT_SUFFIX = {".txt",".csv"};

3.EXCEL_SUFFIX = {".xls",".xlsx"};

4.PDF_SUFFIX = {".pdf"};

5.ZIP_SUFFIX = {".zip"};


import lombok.Getter;
import net.lingala.zip4j.core.ZipFile;
import net.lingala.zip4j.exception.ZipException;
import net.lingala.zip4j.model.ZipParameters;
import net.lingala.zip4j.util.Zip4jConstants;
import org.apache.commons.lang.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.tools.ant.Project;
import org.apache.tools.ant.taskdefs.Expand;
import org.apache.tools.ant.taskdefs.Zip;
import org.apache.tools.ant.types.FileSet;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;

import java.io.*;
import java.util.ArrayList;
import java.util.List;


/**
 * 文件操作工具类 <BR>
 *
 *
 */
public class FileOperatorHelper {

    public final static String[] DOC_SUFFIX = {".doc",".docx"};

    public final static String[] TXT_SUFFIX = {".txt",".csv"};

    public final static String[] EXCEL_SUFFIX = {".xls",".xlsx"};

    public final static String[] PDF_SUFFIX = {".pdf"};

    public final static String[] ZIP_SUFFIX = {".zip"};



    // 创建
    /**
     * @Description: 创建文件夹
     * @Time 2019/7/16 11:23
     */
    private static String mkdir(String path){
        File file = new File(path);
        if(!file.exists()){
            file.mkdir();
        }else{
            path  = path + (int)(Math.random()*100);
            mkdir(path);
        }
        return path;
    }

    // 打开
    public static String open(String path) throws OpenXML4JException, XmlException, IOException {

        if (!path.contains(".")){
            throw new IllegalArgumentException("请输入一个文件路径");
        }
        String suffix = path.substring(path.lastIndexOf("."));

        String text = "";

        if(contains(DOC_SUFFIX,suffix)){
           text = openDoc(new File(path));
        }
        if(contains(TXT_SUFFIX,suffix)){
            text = openTxt2(path);
        }
        if(contains(PDF_SUFFIX,suffix)){
            text = openPdf(new File(path));
        }
        if(contains(EXCEL_SUFFIX,suffix)){
            text = openExcel(path);
        }

        return text;
    }



    private static  boolean contains(String[] arr,String str){
        for (String s : arr) {
            if(StringUtils.equals(s,str)){
                return true;
            }
        }
        return false;
    }

    public static boolean isZip(String path){
        if (!path.contains(".")){
            throw new IllegalArgumentException("请输入一个文件路径");
        }
        String suffix = path.substring(path.lastIndexOf("."));
        return contains(ZIP_SUFFIX,suffix);
    }

    // 文件类型

    // 1 文档格式
    /**
     * @Description: 提取word文档中的内容
     * @Time 2019/7/29 11:57
    */
    public static String openDoc(File file) throws IOException, OpenXML4JException, XmlException {
        FileInputStream in  = null;
        String text = null;
        try {
            // 输入流
            in = new FileInputStream(file);
            // 文本提取实例
            WordExtractor extractor = new WordExtractor(in);
            // 返回正文
            text =  extractor.getText();
        } catch (OfficeXmlFileException e){
            XWPFWordExtractor extractor = new XWPFWordExtractor(POIXMLDocument.openPackage(file.getPath()));
            text = extractor.getText();
        }finally {
            if(in != null ){
                try {
                    in.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return text;
    }

    // 2 文本格式与csv格式

    public static BufferedReader openTxt(String path){

        BufferedReader br = null;

        try {
            InputStream in  = new FileInputStream(path);
            InputStreamReader reader = new InputStreamReader(in,SftpServiceImpl.charset);
            br = new BufferedReader(reader);
            return br;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e){
            e.printStackTrace();
        }
        return br;
    }

    public static String openTxt2(String path) throws IOException {
        File file = new File(path);
        InputStream in = new FileInputStream(file);
        InputStreamReader reader = new InputStreamReader(in,SftpServiceImpl.charset);
        BufferedReader br = new BufferedReader(reader);
        StringBuilder sb = new StringBuilder();
        String line;
        while ((line=br.readLine())!=null){
            sb.append(line);
        }
        return sb.toString();
    }



    // 3.excel格式

    public static String openExcel(String path) throws OpenXML4JException, IOException {

        String text ;
        POIFSFileSystem fs = null;
        OPCPackage pkg = null;
        try{
            fs = new POIFSFileSystem(new File(path));
            // get the Workbook (excel part) stream in a InputStream
            InputStream din = fs.createDocumentInputStream("Workbook");
            // construct out HSSFRequest object
            HSSFRequest req = new HSSFRequest();
            // lazy listen for ALL records with the listener shown above
            ReadExcel2003 reader = new ReadExcel2003();
            req.addListenerForAllRecords(reader);
            // create our event factory
            HSSFEventFactory factory = new HSSFEventFactory();
            // process our events based on the document input stream
            factory.processEvents(req, din);
            // and our document input stream (don't want to leak these!)
            din.close();
            text = reader.getContent();
        }catch (OfficeXmlFileException e){
            pkg = OPCPackage.open(path);
            XSSFReader xssfReader = new XSSFReader(pkg);
            SharedStringsTable sst = xssfReader.getSharedStringsTable();
            StringBuilder sb = new StringBuilder();
            List<CTRst> strings = sst.getItems();
            for (CTRst ctRst : strings) {
                sb.append(ctRst.getT()).append(",");
            }
            text = sb.toString();
        }finally {
            // once all the events are processed close our file input stream
            if( fs != null ){
                fs.close();
            }
            if(pkg != null){
                pkg.close();
            }
        }
        return text;
    }


    // 4.zip格式
    /**
     * @Description: 文件解压缩(文件不加密)
     * @Time 2019/7/16 11:36
     */
    public static String unZip(String srcPath){

        String dstDir = null;

        String expansion = srcPath.substring(srcPath.lastIndexOf("."));
        if(expansion.equals(".zip")){
            dstDir = mkdir(srcPath.substring(0,srcPath.lastIndexOf(".")));
            Project p = new Project();
            Expand e = new Expand();
            e.setProject(p);
            e.setSrc(new File(srcPath));
            e.setOverwrite(false);
            e.setDest(new File(dstDir));
            e.setEncoding(SftpServiceImpl.charset);//ant下的zip工具默认压缩编码为UTF-8编码,而winRAR软件压缩是用的windows默认的GBK或者GB2312编码,所以解压缩时要制定编码格式
            e.execute();
        }
        return dstDir;
    }

   /**
    * @Description: 加密文件解压缩
    * @Time 2019/7/29 15:05
   */
    public static String unZip(String srcPath,String password) throws ZipException {

        String dest = null;

        String expansion = srcPath.substring(srcPath.lastIndexOf("."));
        if(expansion.equals(".zip")) {
            dest = mkdir(srcPath.substring(0, srcPath.lastIndexOf(".")));
            File zipFile = new File(srcPath);
            ZipFile zFile = new ZipFile(zipFile); // 首先创建ZipFile指向磁盘上的.zip文件
            zFile.setFileNameCharset(SftpServiceImpl.charset);
            File destDir = new File(dest); // 指定解压目录
            if (zFile.isEncrypted()) {
                zFile.setPassword(password.toCharArray()); // 设置密码
            }
            zFile.extractAll(dest); // 将文件抽出到解压目录(解压)


            /*List<net.lingala.zip4j.model.FileHeader> headerList = zFile.getFileHeaders();
            List<File> extractedFileList = new ArrayList<>();
            for (FileHeader fileHeader : headerList) {
                if (!fileHeader.isDirectory()) {
                    extractedFileList.add(new File(destDir, fileHeader.getFileName()));
                }
            }
            File[] extractedFiles = new File[extractedFileList.size()];
            extractedFileList.toArray(extractedFiles);
            for (File f : extractedFileList) {
                System.out.println(f.getAbsolutePath() + "文件解压成功!");
            }*/

        }
        return dest;
    }

    // 无加密压缩文件 compress
    public static void compress(String dir,String zipFilepath){
        File file = new File(dir);
        if (!file.exists()) {
            throw new RuntimeException("source file or directory " + dir + " does not exist.");
        }
        Project proj = new Project();
        FileSet fileSet = new FileSet();
        fileSet.setProject(proj);
        // 判断是目录还是文件
        if (file.isDirectory()) {
            fileSet.setDir(file);
            // ant中include/exclude规则在此都可以使用
            // 比如:
            fileSet.setExcludes("**/*.error");
            // fileSet.setIncludes("**/*.xls");
        } else {
            fileSet.setFile(file);
        }
        Zip zip = new Zip();
        zip.setProject(proj);
        zip.setDestFile(new File(zipFilepath));
        zip.addFileset(fileSet);
        zip.setEncoding(SftpServiceImpl.charset);
        zip.execute();
        System.out.println("compress successed.");
    }

    // 加密压缩文件
    public static void compress(String dir,String zipFilepath,String passwd) throws ZipException {
        File srcFile = new File(dir);
        ZipFile zipFile = new ZipFile(zipFilepath);
        ZipParameters parameters = new ZipParameters();
        parameters.setCompressionMethod(Zip4jConstants.COMP_DEFLATE);			// 压缩方式
        parameters.setCompressionLevel(Zip4jConstants.DEFLATE_LEVEL_NORMAL);	// 压缩级别
        if (!StringUtils.isEmpty(passwd)) {
            parameters.setEncryptFiles(true);
            parameters.setEncryptionMethod(Zip4jConstants.ENC_METHOD_STANDARD);	// 加密方式
            parameters.setPassword(passwd.toCharArray());
        }
        if (srcFile.isDirectory()) {
            List<File> subFiles = list(dir);
            //ArrayList<File> temp = new ArrayList<>();
            // 过滤不符合要求文件
            for (File file : subFiles) {
                if(file.getName().matches("^.*\\.error$")){
                    //temp.add(file);
                    delete(file.getPath());
                }
            }
            //zipFile.addFiles(temp, parameters);
            zipFile.addFolder(srcFile, parameters);
        } else {
            if(!dir.matches("^.*\\.error$")){
                zipFile.addFile(srcFile, parameters);
            }
        }




    }




    // 5.pdf格式
    private static String openPdf(File file) throws IOException {
        // pdf文件实例化为PDDocument对象
        PDDocument doc = PDDocument.load(file);
        // 创建pdf文本剥离器实例
        PDFTextStripper pdfTextStripper = new PDFTextStripper();
        // 剥离文本
        String text = pdfTextStripper.getText(doc);
        doc.close();
        return text;
    }

    // 输出格式

    // IO流、byte[]、String


    /**
     * @Description: 删除指定文件或文件夹
     * @Time 2019/7/16 11:27
     */
    public  static  void delete(String path){
        File file = new File(path);
        if(file.exists()){
            File[] files = file.listFiles();
            if(!file.isDirectory()||files == null){
                file.delete();
                return;
            }
            for (File f : files) {
                delete(f.getPath());
            }
            file.delete();
        }
    }



    /**
     * @Description: http 设置content-Type参数
     * @Time 2019/7/16 11:15
    */
    /*public static String getMime(String path){
        String expansion = path.substring(path.lastIndexOf(".")+1);
        return Mime.getMime(expansion);
    }*/

    /**
     * @Description: 判断指定路径下的文件或目录是否存在
     * @Time 2019/7/17 10:18
    */
    public static boolean exists(String path){
        File file = new File(path);
        return file.exists();
    }


    /**
     * @Description: 文件列表
     * @Time 2019/7/16 11:56
    */
    public static List<File> list(String path){

        List<File> files = new ArrayList<>();
        File file = new File(path);
        if(file.isDirectory()){
            File[] listFiles = file.listFiles();
            for (File f : listFiles) {
                files.addAll(list(f.getPath()));
            }
        }else {
            String str = unZip(file.getPath());
            if (str == null){
                files.add(file);
            }else{
                files.addAll(list(str));
                delete(file.getPath());
            }
        }
        return files;
    }

    public static void main(String[] args) throws ZipException {
        compress("C:\\Users\\Lenovo\\Desktop\\数据存储层","C:\\Users\\Lenovo\\Desktop\\controller.zip","123456");
        System.out.println("compress complete");
    }

    /**
     * @Description: 获取输出流
     * @Time 2019/7/29 13:45
    */
    public static BufferedWriter write(String path){
        BufferedWriter bw = null;
        try {
            OutputStream out  = new FileOutputStream(path);
            OutputStreamWriter writer = new OutputStreamWriter(out,"gbk");
            bw = new BufferedWriter(writer);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e){
            e.printStackTrace();
        }
        return bw;
    }

}
class ReadExcel2003 implements HSSFListener{

    @Getter
    private String content;

    @Override
    public void processRecord(Record record) {
        switch (record.getSid()){
            case SSTRecord.sid:
                SSTRecord sstRecord = (SSTRecord) record;
                content = toText(sstRecord);
                break;
        }
    }
    private String toText(SSTRecord sstRecord){
        StringBuilder sb = new StringBuilder();
        for(int i=0;i<sstRecord.getNumUniqueStrings();i++){
            sb.append(sstRecord.getString(i)).append(",");
            //System.out.println(sstRecord.getString(i));
        }
        return sb.toString();
    }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值