doc,docx,pdf,ppt等文件类型读取方法

//读取ppt文件
    public String readPPT(String file) throws IOException {
        String re="";
        InputStream is = null;
        try{
            is = new FileInputStream(new File(file));
            PowerPointExtractor extractor = new PowerPointExtractor(is);
            re = extractor.getText();
        }catch(Exception e){
            System.out.println("读取ppt出错"+e.toString());
        }finally {
            is.close();
        }
        return re;
    }

    //读取pptx文件
    public String readPPT2007(String file) throws IOException {
        OPCPackage opc = null;
        String re = "";
        try{
            opc = POIXMLDocument.openPackage(file);
            re = new XSLFPowerPointExtractor(opc).getText();
        } catch (Exception e) {
            System.out.println("读取pptx出错"+e.toString());
        }finally {
            opc.close();
        }
        return  re;
    }

    // 读取pdf文件
    public String readPDF(String file) throws IOException {
        String result = null;
        FileInputStream is = null;
        PDDocument document = null;
        try{
            is = new FileInputStream(file);
            PDFParser parser = new PDFParser(is);
            parser.parse();
            document = parser.getPDDocument();
            PDFTextStripper stripper = new PDFTextStripper();
            result = stripper.getText(document);
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            if(is != null){
                is.close();
            }
            if (document != null){
                document.close();
            }
        }
        return result;
    }

    // 读取doc文件
    public String readWord(String file){
        String result ="";
        WordExtractor wordExtractor = null;
        try{
            wordExtractor = new WordExtractor(new FileInputStream(file));
            result = wordExtractor.getText();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return result;
    }

    // 读取docx文件
    public String readDocx(String file) throws IOException {
        String result = "";
        OPCPackage opc = null;
        try{
            opc = POIXMLDocument.openPackage(file);
            result = new XWPFWordExtractor(opc).getText();
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            opc.close();
        }
        return result;
    }

    // 读取xls文件
    public String readEXCEL(String file){
        StringBuilder content = new StringBuilder();
        HSSFWorkbook workbook = null;
        try{
            workbook = new HSSFWorkbook(new FileInputStream(file));
            for(int numSheets = 0;numSheets <workbook.getNumberOfSheets();numSheets++){
                if(null != workbook.getSheetAt(numSheets)){
                    HSSFSheet sheet = workbook.getSheetAt(numSheets);
                    for(int rowNumOfSheet  = 0;rowNumOfSheet <= sheet.getLastRowNum();rowNumOfSheet++ ){
                        HSSFRow row = sheet.getRow(rowNumOfSheet);
                        for (short cellNumOfRow = 0; cellNumOfRow <=row.getLastCellNum();cellNumOfRow ++){
                            HSSFCell cell = row.getCell(cellNumOfRow);
                            if(this.convertCellHSSFCell(cell).length() > 0){
                                content.append(this.convertCellHSSFCell(cell));
                            }
                        }
                        content.append("\n");
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return content.toString();
    }

    private String convertCellHSSFCell(HSSFCell cell){
        NumberFormat format = NumberFormat.getInstance();
        format.setGroupingUsed(false);
        String cellValue = "";
        if(cell == null){
            return cellValue;
        }
        switch (cell.getCellType()){
            case HSSFCell.CELL_TYPE_NUMERIC:
                cellValue = format.format(cell.getNumericCellValue());
                break;
            case HSSFCell.CELL_TYPE_STRING:
                cellValue = cell.getStringCellValue();
                break;
            case HSSFCell.CELL_TYPE_BLANK:
                cellValue = cell.getStringCellValue();
                break;
            case HSSFCell.CELL_TYPE_BOOLEAN:
                cellValue = Boolean.valueOf(cell.getBooleanCellValue()).toString();
                break;
            case HSSFCell.CELL_TYPE_ERROR:
                cellValue = String.valueOf(cell.getErrorCellValue());
                break;
            default:
                cellValue = "";
        }
        return cellValue.trim();
    }

    // 读取xlsx文件
    public String readEXCEL2007(String file) throws IOException {
        XSSFWorkbook workbook=null;
        StringBuilder content = new StringBuilder();
        try{
            workbook = new XSSFWorkbook(file);
            for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {
                if (null != workbook.getSheetAt(numSheets)) {
                    XSSFSheet aSheet = workbook.getSheetAt(numSheets);// 获得一个sheet
                    for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) {
                        if (null != aSheet.getRow(rowNumOfSheet)) {
                            XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
                            for (short cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) {
                                if (null != aRow.getCell(cellNumOfRow)) {
                                    XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
                                    if (this.convertCellXHSSFCell(aCell).length() > 0) {
                                        content.append(this.convertCellXHSSFCell(aCell));
                                    }
                                }
                                content.append("\n");
                            }
                        }
                    }
                }
            }
        }
        catch(Exception ex){
            System.out.println("读取excel出错"+ex.toString());
        }
        return content.toString();
    }


    private String convertCellXHSSFCell(XSSFCell aCell) {
        NumberFormat formater = NumberFormat.getInstance();
        formater.setGroupingUsed(false);
        String cellValue = "";
        if (aCell == null) {
            return cellValue;
        }
        switch (aCell.getCellType()) {
            case HSSFCell.CELL_TYPE_NUMERIC:
                cellValue = formater.format(aCell.getNumericCellValue());
                break;
            case HSSFCell.CELL_TYPE_STRING:
                cellValue = aCell.getStringCellValue();
                break;
            case HSSFCell.CELL_TYPE_BLANK:
                cellValue = aCell.getStringCellValue();
                break;
            case HSSFCell.CELL_TYPE_BOOLEAN:
                cellValue = Boolean.valueOf(aCell.getBooleanCellValue()).toString();
                break;
            case HSSFCell.CELL_TYPE_ERROR:
                cellValue = String.valueOf(aCell.getErrorCellValue());
                break;
            default:
                cellValue = "";
        }
        return cellValue.trim();
    }

所需jar包的maven坐标 :

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.9</version>
        </dependency>
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
       <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
            <version>1.0.6</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.core</artifactId>
            <version>1.0.6</version>
        </dependency>

        <dependency>
            <groupId>org.apache.directory.studio</groupId>
            <artifactId>org.apache.commons.collections</artifactId>
            <version>3.2.1</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.core</artifactId>
            <version>1.0.5</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
            <version>1.0.5</version>
        </dependency>

        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
            <version>1.0.5</version>
        </dependency>

        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>1.8.13</version>
        </dependency>

        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox-tools</artifactId>
            <version>2.0.8</version>
        </dependency>
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Radom7

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值