读取文件(txt,doc,docx,xlsx,xls,pdf)内容并转化为base64编码

将文件(txt,doc,docx,xlsx,xls,pdf)内容转化为base64编码;读取文件的内容;

运行结果
运行效果

1.添加maven依赖 版本自行更改

        <dependency>
           <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>1.8.8</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.0.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.16</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-examples</artifactId>
            <version>3.16</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-excelant</artifactId>
            <version>3.16</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.16</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.16</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.16</version>
        </dependency>

2.在domain创建文件实体类fileDTO

public class fileDTO {

    private String id;
    private String name;
    private String type;
    private String content;

    public fileDTO() {}

    public fileDTO(String id, String name, String type, String content) {
        this.id = id;
        this.name = name;
        this.type = type;
        this.content = content;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    @Override
    public String toString() {
        return "[ id: "+id+" name: "+name +" type: "+ type  +"  content:  "+content+"]";
    }
}

3.根据文件类型读取其文件内容,并将内容转化为base64编码

 //将文件内容转化为base64编码
    void readFile() throws IOException {
    //文件路径
        File file = new File("D:/test/测试.pdf");

        fileDTO filedto = new fileDTO();
        filedto.setName(file.getName());
        System.out.println(filedto.getName());
        filedto.setType(file.getName().substring(file.getName().lastIndexOf(".") + 1));
//        System.out.println(filedto.getType());
        byte[] bytes = null;
        //根据类型读取文件内容
        //docx
        if(filedto.getType().equals("docx")){
            System.out.println("docx");
             bytes = getdocxContent(file.getPath());
        }
        //txt
        else if(filedto.getType().equals("txt")){
            System.out.println("txt");
             bytes = gettxtContent(file);
        }
        //doc
        else if(filedto.getType().equals("doc")){
            System.out.println("doc");
            bytes = getdocContent(file.getPath());
        }
        //xlsx
        else if(filedto.getType().equals("xlsx")){
            System.out.println("xlsx");
            bytes = getxlsxContent(file.getPath());
        }
        //xls
        else if(filedto.getType().equals("xls")){
            System.out.println("xls");
            bytes = getxlsContent(file.getPath());
        }
        //pdf
        else if (filedto.getType().equals("pdf")){
            System.out.println("pdf");
            bytes = getpdfContent(file.getPath());
        }

        String base64 = Base64.getEncoder().encodeToString(bytes);
        filedto.setContent(base64);
        System.out.println(filedto.getContent());
    }

4.若是只读取内容,不转base64,只需要一下即可(以gettxtContent为例)

String gettxtContent(File filePath) throws IOException {
    byte[] bytes = Files.readAllBytes(Paths.get(String.valueOf(filePath)));
    String content = new String(bytes, StandardCharsets.UTF_8);
    //byte[] bytes1 = content.getBytes();
    return content;
}

读取文件内容,并转base64,返回byte[]类型


 /**
     * 获取txt文件内容
     * @param filePath
     * @return byte content
     * @throws IOException
     */
    byte[] gettxtContent(File filePath) throws IOException {

        byte[] bytes = Files.readAllBytes(Paths.get(String.valueOf(filePath)));
        String content = new String(bytes, StandardCharsets.UTF_8);
        byte[] bytes1 = content.getBytes();
        return bytes1;
    }

    /**
     * 获取docx文件内容
     * @param path
     * @return 字节内容
     */
    byte[] getdocxContent(String path){
        //读取docx文件路径
        OPCPackage opcPackage = null;
        String content = null;
        List<String> docxList = new ArrayList<String>();
        try {
            opcPackage = POIXMLDocument.openPackage(path);
            XWPFDocument xwpf = new XWPFDocument(opcPackage);
            POIXMLTextExtractor poiText = new XWPFWordExtractor(xwpf);
            content = poiText.getText();
            docxList.add(content);
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(docxList);
         String ss = String.join(",", docxList);
         byte[] bytes1 =ss.getBytes();
        return bytes1;
    }

    //读取word文档中,doc后缀的文件
    byte[] getdocContent(String filePath){
            List<String> docList = new ArrayList<String>();
            String content=null;
            //读取字节流,读取文件路径
            InputStream input = null;
            try {
                input = new FileInputStream(new File(filePath));
                WordExtractor wex = new WordExtractor(input);
                content = wex.getText();
                //System.out.println(content);
                docList.add(content);
            } catch (Exception e) {
                e.printStackTrace();
            }
        System.out.println(docList);
            String ss = String.join(",",docList);
            byte[] bytes1 = ss.getBytes();
            return bytes1;
    }


    /**
     * 获取xlsx的内容
     * @param filePath
     * @return
     */
    byte[] getxlsxContent(String filePath){
        List<String> list = new ArrayList<>();
        //用流的方式读取
        FileInputStream fis;
        try{
            fis = new FileInputStream(new File(filePath));
            //获取整个excel
            XSSFWorkbook hb = new XSSFWorkbook(fis);
            System.out.println(hb.getNumCellStyles());
            //获取第一个表单sheet
            Sheet sheet  = hb.getSheetAt(0);
            //获取第一行
            int firstrow = sheet.getFirstRowNum();
            //最后一行
            int lastrow = sheet.getLastRowNum();
            //循环行数依次获取列数
            for(int i= firstrow;i<lastrow;i++){
                Row row = sheet.getRow(i);
                if(row != null){
                    //获取这一行的第一列
                    int firstcell = row.getFirstCellNum();
                    //获取这一行的最后一列
                    int lastcell = row.getLastCellNum();
                    for(int j = firstcell;j<lastcell;j++){
                        //获取第j列
                        Cell cell = row.getCell(j);
                        if (cell != null ){
                            System.out.println(cell.toString());
                            list.add(cell.toString());
                        }
                    }
                    System.out.println();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println(list);
        String ss = String.join(",",list);
        byte[] bytes1 =ss.getBytes();

        return bytes1;
    }


    /**
     * 获取xls的内容
     * @param filePath
     * @return
     */
    byte[] getxlsContent(String filePath){
        List<String> list = new ArrayList<>();

        try{
            //解析excel
            POIFSFileSystem pSystem = new POIFSFileSystem(new File(filePath));
            //获取整个excel
            HSSFWorkbook hb = new HSSFWorkbook(pSystem);
            System.out.println(hb.getNumCellStyles());
            //获取第一个人表单sheet
            HSSFSheet sheet = hb.getSheetAt(0);
            //获取第一行
            int firstrow = sheet.getFirstRowNum();
            //获取最后一行
            int lastrow = sheet.getLastRowNum();
            for (int i =firstrow;i<lastrow;i++){
                //获取那一行i
                HSSFRow row = sheet.getRow(i);
                if (row != null) {
                    //获取此行第一列
                    int firstcell = row.getFirstCellNum();
                    //最后一列
                    int lastcell = row.getLastCellNum();
                    for (int j =firstcell;j<lastcell;j++) {
                        //获取第j列
                        HSSFCell cell = row.getCell(j);
                        if(cell != null) {
                            list.add(cell.toString());
                        }
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println(list);
        String ss= String.join(",",list);
        byte[] bytes1 = ss.getBytes();

        return bytes1;
    }


    /**
     * 获取pdf的内容
     * @param filePath
     * @return
     */
    byte[] getpdfContent(String filePath) {
        boolean sort = false; //是否排序
        int startPage = 1; //开始提取页数
        int endPage = Integer.MAX_VALUE; //结束提取页数
        String content = null; //暂时存放pdf内容

        InputStream input = null;
        File file = new File(filePath);
        PDDocument document = null;
        try{
            input = new FileInputStream(file);
            //加载pdf文档
            PDFParser parser = new PDFParser(input);
            parser.parse();
            document = parser.getPDDocument();
            //获取内容信息
            PDFTextStripper pts = new PDFTextStripper();
            pts.setSortByPosition(sort);

            endPage = document.getNumberOfPages();

            pts.setStartPage(startPage);
            pts.setEndPage(endPage);
            try{
                content = pts.getText(document);
                System.out.println(content);
            }catch (Exception e){
                e.printStackTrace();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        String ss = String.join(",",content);
        byte[] bytes1= ss.getBytes();

        return bytes1;
    }

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值