excel解析图片pdf附件不怕

背景


	工作中肯定会有导入excel还附带图片附件的
	下面是我解析的excel,支持图片、pdf、压缩文件

实现


	依次去解析excel,看看也没有附件,返回的格式是Map,key是第几行,value是附件list
	附件格式都被解析成pdf格式

Reader.java


package com.ruoyi.srm.service;

import java.util.List;

import org.apache.poi.ss.usermodel.Workbook;

import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;

public interface Reader {

    /**
     * @param workbook
     * @param targetRow 目标行索引(例如第3行,索引从0开始)
     * @return
     */
    List<FileListBean> read(Workbook workbook, int targetCol);

}

ReaderComposite.java


package com.ruoyi.srm.service.impl;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;
import com.ruoyi.srm.service.Reader;

@Component
public class ReaderComposite {

    @Autowired
    private List<Reader> readerList;

    /**
     * @param workbook
     * @param targetRow 目标行索引(例如第3行,索引从0开始)
     * @return
     */
    public Map<String, List<FileListBean>> read(Workbook workbook, int targetCol) {
        return readerList.stream()
                .map(reader -> reader.read(workbook, targetCol))
                .flatMap(Collection::stream).collect(Collectors.groupingBy(t -> t.getLine() + ""));
    }

}

ImageReader.java


package com.ruoyi.srm.service.impl;

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFClientAnchor;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFPicture;
import org.apache.poi.xssf.usermodel.XSSFPictureData;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.springframework.boot.system.ApplicationHome;
import org.springframework.stereotype.Component;

import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;
import com.ruoyi.srm.service.Reader;

import cn.hutool.core.io.FileUtil;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;

@Slf4j
@Component
public class ImageReader implements Reader {

    /**
     * @param workbook
     * @param targetRow 目标行索引(例如第3行,索引从0开始)
     * @return
     */
    @Override
    @SneakyThrows
    public List<FileListBean> read(Workbook workbook, int targetCol) {
        ApplicationHome home = new ApplicationHome();
        String rootPath = home.getDir().getAbsolutePath() + File.separator + "extract" + File.separator;
        List<FileListBean> result = new ArrayList<>();
        Map<String, AtomicInteger> counter = new HashMap<>();
        // 指定要读取图片的工作表和单元格位置
        Sheet sheet = workbook.getSheetAt(0); // 第一个工作表
        // 遍历所有绘图对象(包含图片)
        if (sheet instanceof XSSFSheet) {
            XSSFSheet xssfSheet = (XSSFSheet) sheet;
            XSSFDrawing drawing = xssfSheet.getDrawingPatriarch();
            if (drawing != null) {
                // 遍历所有形状(包括图片)
                String dir = rootPath + "_" + System.currentTimeMillis();
                for (XSSFShape shape : drawing.getShapes()) {
                    if (shape instanceof XSSFPicture) {
                        XSSFPicture picture = (XSSFPicture) shape;
                        XSSFClientAnchor anchor = (XSSFClientAnchor) picture.getAnchor();
                        // 检查图片的左上角是否在目标单元格
                        int targetRow = anchor.getRow1();
                        if (anchor.getCol1() == targetCol) {
                            AtomicInteger integer = counter.computeIfAbsent(targetRow + "_" + targetCol, k -> new AtomicInteger());
                            // 提取图片数据
                            XSSFPictureData pictureData = picture.getPictureData();
                            byte[] imageBytes = pictureData.getData();

                            // 保存图片到本地
                            new File(dir).mkdirs();
                            String filePath = dir + File.separator + "image_" + (targetRow + 1) + "_" + targetCol + "_" + integer
                                    .incrementAndGet() + "." + pictureData
                                            .suggestFileExtension();
                            try (FileOutputStream out = new FileOutputStream(filePath)) {
                                out.write(imageBytes);
                                log.info("第{}行图片已保存到: {}", targetRow + 1, filePath);
                                String encodeToString = Base64.getEncoder().encodeToString(FileUtil.readBytes(filePath));
                                String mimeType = FileUtil.getMimeType(filePath);
                                if ("image/jpeg".equals(mimeType)) {
                                    encodeToString = "data:image/png;base64," + encodeToString;
                                }
                                result.add(new FileListBean().setFileName(new File(filePath).getName()).setContent(encodeToString).setLine(targetRow));
                            }
                        }
                    }
                }
            }
        }
        return result;
    }
}

AttachmentReader.java


package com.ruoyi.srm.service.impl;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.poi.ooxml.POIXMLDocumentPart;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFClientAnchor;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFObjectData;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.tika.Tika;
import org.springframework.boot.system.ApplicationHome;
import org.springframework.stereotype.Component;

import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;
import com.ruoyi.srm.service.Reader;

import cn.hutool.core.io.FileUtil;
import lombok.Cleanup;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;

@Slf4j
@Component
public class AttachmentReader implements Reader {

    /**
     * @param workbook
     * @param targetRow 目标行索引(例如第3行,索引从0开始)
     * @return
     */
    @Override
    @SneakyThrows
    public List<FileListBean> read(Workbook workbook, int targetCol) {
        ApplicationHome home = new ApplicationHome();
        String rootPath = home.getDir().getAbsolutePath() + File.separator + "extract" + File.separator;
        List<FileListBean> result = new ArrayList<>();
        Map<String, AtomicInteger> counter = new HashMap<>();
        // 1. 获取所有嵌入对象
        Sheet sheet = workbook.getSheetAt(0); // 第一个工作表

        // 1. 获取所有嵌入对象
        XSSFSheet xssfSheet = (XSSFSheet) sheet;
        List<POIXMLDocumentPart> relationList = xssfSheet.getRelations();

        // 在遍历嵌入对象时,检查锚点位置
        for (POIXMLDocumentPart part : relationList) {
            if (part instanceof XSSFDrawing) {
                XSSFDrawing drawing = (XSSFDrawing) part;
                for (XSSFShape shape : drawing.getShapes()) {
                    if (shape instanceof XSSFObjectData) {
                        XSSFObjectData objData = (XSSFObjectData) shape;
                        XSSFClientAnchor anchor = (XSSFClientAnchor) objData.getAnchor();

                        // 检查锚点是否在目标位置(例如第3行第2列,即B3)
                        int targetRow = anchor.getRow1(); // 行索引从0开始
                        if (anchor.getCol1() == targetCol) {
                            AtomicInteger integer = counter.computeIfAbsent(targetRow + "_" + targetCol, k -> new AtomicInteger());
                            // 提取并保存文件
                            byte[] objectData = objData.getObjectData();
                            @Cleanup
                            POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(objectData));
                            String symbol = "\u0001Ole10Native";
                            if (poifs.getRoot().getEntryNames().contains(symbol)) {
                                InputStream contentStream = poifs.createDocumentInputStream(symbol);
                                String dir = rootPath + "_" + System.currentTimeMillis();
                                new File(dir).mkdirs();
                                String name = "";
                                byte[] byteArray = IOUtils.toByteArray(contentStream);

                                Tika tika = new Tika();
                                String detect = tika.detect(byteArray);
                                System.err.println(detect);
                                if ("application/pdf".equals(detect)) {
                                    name = dir + File.separator + "pdf_" + (targetRow + 1) + "_" + targetCol + "_" + integer.incrementAndGet() + ".pdf";
                                } else if ("application/octet-stream".equals(detect)) {
//                                    name = dir + ".zip"; 注释
//                                    @Cleanup
//                                    ZipArchiveInputStream seek = new ZipArchiveInputStream(new ByteArrayInputStream(byteArray));
//                                    try {
//                                        seek.getNextEntry();
//                                    } catch (Exception e) {
//                                        log.debug("解析zip失败.尝试解析成图片");
//                                        name = dir + File.separator + "image_" + (targetRow + 1) + "_" + targetCol + "_" + integer.incrementAndGet() + ".jpg";
//                                    }
                                }
                                @Cleanup
                                FileOutputStream out = new FileOutputStream(name);
                                out.write(byteArray);
                                log.info("第{}行{}文件保存成功: {}", targetRow + 1, detect, name);
                                if (name.endsWith(".zip")) {
                                    @Cleanup
                                    ZipArchiveInputStream zis = new ZipArchiveInputStream(new FileInputStream(name));
                                    ZipArchiveEntry entry;
                                    while ((entry = zis.getNextEntry()) != null) {
                                        if (entry.isDirectory()) {
                                            log.warn("是目录");
                                        } else {
                                            // 如果是文件,则解压文件
                                            File file = new File(dir, entry.getName());
                                            try (FileOutputStream out2 = new FileOutputStream(file)) {
                                                byte[] buffer2 = new byte[1024];
                                                int len;
                                                while ((len = zis.read(buffer2)) > 0) {
                                                    out2.write(buffer2, 0, len);
                                                }

                                            }
                                            log.info("第{}提取{}已保存到: {}", targetRow + 1, entry.getName(), file.getAbsolutePath());
                                        }
                                    }
                                }
                                // 转base64
                                Arrays.stream(FileUtil.ls(dir)).forEach(item -> {
                                    // System.err.println(item.getName());
                                    extracted(result, targetRow, item);
                                });
                            }
                        }
                    }
                }
            }
        }
        return result;
    }

    private static void extracted(List<FileListBean> result, int targetRow, File item) {
        String path = item.getPath();
        String encodeToString = Base64.getEncoder().encodeToString(FileUtil.readBytes(path));
        String mimeType = FileUtil.getMimeType(path);
        // System.err.println(mimeType);
        if ("image/jpeg".equals(mimeType)) {
            encodeToString = "data:image/png;base64," + encodeToString;
        } else {
            // System.err.println(encodeToString);
        }
        result.add(new FileListBean().setFileName(item.getName()).setContent(encodeToString).setLine(targetRow));
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值