Java解析pdf、doc、docx、ppt、xls等文件

本文是从一个大佬那里摘过来的,忘记是哪个了,在这里向大佬道个歉,并表示感谢,感谢大佬的技术分享,我就厚着脸皮发布了!😉
1.所需依赖

<dependency>
       <groupId>org.apache.pdfbox</groupId>
       <artifactId>pdfbox</artifactId>
       <version>2.0.2</version>
   </dependency>
   <dependency>
       <groupId>org.apache.poi</groupId>
       <artifactId>poi</artifactId>
       <version>3.15</version>
   </dependency>
   <dependency>
       <groupId>org.apache.poi</groupId>
       <artifactId>poi-ooxml</artifactId>
       <version>3.15</version>
   </dependency>
   <dependency>
       <groupId>org.apache.poi</groupId>
       <artifactId>poi-ooxml-schemas</artifactId>
       <version>3.15</version>
   </dependency>
   <dependency>
       <groupId>org.apache.poi</groupId>
       <artifactId>poi-scratchpad</artifactId>
       <version>3.15</version>
   </dependency>
   <dependency>
       <groupId>org.apache.xmlbeans</groupId>
       <artifactId>xmlbeans</artifactId>
       <version>2.6.0</version>
   </dependency>
   <dependency>
       <groupId>dom4j</groupId>
       <artifactId>dom4j</artifactId>
       <version>1.6.1</version>
   </dependency>

2.代码示例

 /**
     * 读取doc文件
     * @param filePath
     * @throws Exception
     */
    public static String getTextFromDoc(String filePath) throws Exception{
        StringBuilder sb = new StringBuilder();
        FileInputStream fis = new FileInputStream(new File(filePath));
        HWPFDocument doc = new HWPFDocument(fis);
        Range rang = doc.getRange();
        sb.append(rang.text());
        fis.close();
        return sb.toString();

    }
    /**
     * 读取docx文件
     * @param filePath
     * @throws IOException
     */
    public static String getTextFromDocx(String filePath) throws IOException {
        FileInputStream in = new FileInputStream(filePath);
        XWPFDocument doc = new XWPFDocument(in);
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
        String text = extractor.getText();
        in.close();
        return text;
    }
    /**
     * 读取pdf文件
     * @param filePath
     * @throws IOException
     */
    public static String getTextFromPDF(String filePath) throws IOException{
        File input = new File(filePath);
        PDDocument pd = PDDocument.load(input);
        PDFTextStripper stripper = new PDFTextStripper();
        return stripper.getText(pd);
    }
    /**
     * 读取ppt文件
     * @param filePath
     * @throws IOException
     */
    public static String getTextFromPPT( String filePath) throws IOException{
        FileInputStream in = new FileInputStream(filePath);
        PowerPointExtractor extractor = new PowerPointExtractor(in);
        String content = extractor.getText();
        extractor.close();
        return content;
    }
    /**
     * 读取pptx文件
     * @param filePath
     * @throws IOException
     */
    public static String getTextFromPPTX( String filePath) throws IOException{
        String resultString = null;
        StringBuilder sb = new StringBuilder();
        FileInputStream in = new FileInputStream(filePath);
        try {
            XMLSlideShow xmlSlideShow = new XMLSlideShow(in);
            List<XSLFSlide> slides = xmlSlideShow.getSlides();
            for(XSLFSlide slide:slides){
                CTSlide rawSlide = slide.getXmlObject();
                CTGroupShape gs = rawSlide.getCSld().getSpTree();
                CTShape[] shapes = gs.getSpArray();
                for(CTShape shape:shapes){
                    CTTextBody tb = shape.getTxBody();
                    if(null==tb){
                        continue;
                    }
                    CTTextParagraph[] paras = tb.getPArray();
                    for(CTTextParagraph textParagraph:paras){
                        CTRegularTextRun[] textRuns = textParagraph.getRArray();
                        for(CTRegularTextRun textRun:textRuns){
                            sb.append(textRun.getT());
                        }
                    }
                }
            }
            resultString = sb.toString();
            xmlSlideShow.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return resultString;
    }
    /**
     * 读取xls
     * @param filePath
     * @throws IOException
     */
    public static String getTextFromxls(String filePath) throws IOException{
        FileInputStream in = new FileInputStream(filePath);
        StringBuilder content = new StringBuilder();
        HSSFWorkbook workbook = new HSSFWorkbook(in);
        for(int sheetIndex=0;sheetIndex<workbook.getNumberOfSheets();sheetIndex++){
            HSSFSheet sheet = workbook.getSheetAt(sheetIndex);
            for(int rowIndex=0;rowIndex<=sheet.getLastRowNum();rowIndex++){
                HSSFRow row = sheet.getRow(rowIndex);
                if(row==null){
                    continue;
                }
                for(int cellnum=0;cellnum<row.getLastCellNum();cellnum++){
                    HSSFCell cell = row.getCell(cellnum);
                    if(cell!=null){
                        content.append(cell.getRichStringCellValue().getString()+" ");
                    }

                }
            }

        }
        workbook.close();
        return content.toString();

    }
    /**
     * 用来读取xlsx文件
     * @param filePath
     * @throws IOException
     */
    public static String getTextFromxlsx(String filePath) throws IOException{
        StringBuilder content = new StringBuilder();
        XSSFWorkbook workbook = new XSSFWorkbook(filePath);
        for(int sheet=0;sheet<workbook.getNumberOfSheets();sheet++){
            if(null!=workbook.getSheetAt(sheet)){
                XSSFSheet aSheet =workbook.getSheetAt(sheet);
                for(int row=0;row<=aSheet.getLastRowNum();row++){
                    if(null!=aSheet.getRow(row)){
                        XSSFRow aRow = aSheet.getRow(row);
                        for(int cell=0;cell<aRow.getLastCellNum();cell++){
                            if(null!=aRow.getCell(cell)){
                                XSSFCell aCell = aRow.getCell(cell);
                                if(convertCell(aCell).length()>0){
                                    content.append(convertCell(aCell));
                                }
                            }
                            content.append(" ");
                        }
                    }
                }
            }
        }
        workbook.close();
        return content.toString();

    }

    private static String (Cell cell){
        NumberFormat formater = NumberFormat.getInstance();
        formater.setGroupingUsed(false);
        String cellValue="";
        if(cell==null){
            return cellValue;
        }

        switch(cell.getCellType()){
            case HSSFCell.CELL_TYPE_NUMERIC:
                cellValue = formater.format(cell.getNumericCellValue());
                break;
            case HSSFCell.CELL_TYPE_STRING:
                cellValue = cell.getStringCellValue();
                break;
            case HSSFCell.CELL_TYPE_BLANK:
                cellValue = cell.getStringCellValue();
                break;
            case HSSFCell.CELL_TYPE_BOOLEAN:
                cellValue = Boolean.valueOf(cell.getBooleanCellValue()).toString();
                break;
            case HSSFCell.CELL_TYPE_ERROR:
                cellValue = String.valueOf(cell.getErrorCellValue());
                break;
            default:cellValue="";
        }
        return cellValue.trim();
    }
  • 2
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
好的,我可以帮你回答这个问题。首先需要获取到android11以上的存储权限,然后使用Storage Access Framework(SAF)来遍历文件。接着,我们可以使用DocumentFile API来过滤出指定类型的文件,最后使用lod.d来打印文件内容。 以下是一个示例代码: ```java // 获取存储权限 if (ContextCompat.checkSelfPermission(this, Manifest.permission.READ_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED) { ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.READ_EXTERNAL_STORAGE}, 1); return; } // 打开SAF文件选择器 Intent intent = new Intent(Intent.ACTION_OPEN_DOCUMENT_TREE); startActivityForResult(intent, 2); // 在onActivityResult中处理文件选择结果 @Override protected void onActivityResult(int requestCode, int resultCode, @Nullable Intent data) { super.onActivityResult(requestCode, resultCode, data); if (requestCode == 2 && resultCode == RESULT_OK) { Uri treeUri = data.getData(); DocumentFile pickedDir = DocumentFile.fromTreeUri(this, treeUri); // 遍历文件并打印 for (DocumentFile file : pickedDir.listFiles()) { String fileName = file.getName(); if (fileName.endsWith(".txt") || fileName.endsWith(".doc") || fileName.endsWith(".docx") || fileName.endsWith(".pdf") || fileName.endsWith(".ppt") || fileName.endsWith(".pptx") || fileName.endsWith(".xls") || fileName.endsWith(".xlsx")) { try { InputStream inputStream = getContentResolver().openInputStream(file.getUri()); BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); StringBuilder stringBuilder = new StringBuilder(); String line; while ((line = reader.readLine()) != null) { stringBuilder.append(line); } Log.d("FileContents", stringBuilder.toString()); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } } } ``` 请注意,上述代码仅供参考,并且可能需要根据您的实际需求进行调整。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值