POI解析word试卷(解析表格,wmf转png图片)

需要的jar包
<!-- doc文档的emf图片格式转png图片 -->
        <dependency>
            <groupId>org.freehep</groupId>
            <artifactId>freehep-graphicsio-emf</artifactId>
            <version>2.1.3</version>
        </dependency>
        <dependency>
            <groupId>org.freehep</groupId>
            <artifactId>freehep-io</artifactId>
            <version>2.0.5</version>
        </dependency>
        <!-- wmf转svg转png -->
        <dependency>
            <groupId>xml-apis</groupId>
            <artifactId>xml-apis-ext</artifactId>
            <version>1.3.04</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>net.arnx</groupId>
            <artifactId>wmf2svg</artifactId>
            <version>0.9.5</version>
        </dependency>
        <dependency>
            <groupId>org.w3c</groupId>
            <artifactId>dom</artifactId>
            <version>2.3.0-jaxb-1.0.6</version>
        </dependency>
        <dependency>
            <groupId>org.codeartisans.thirdparties.swing</groupId>
            <artifactId>batik-all</artifactId>
            <version>1.8pre-r1084380</version>
        </dependency>
        <!--poi-->
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-excelant</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-examples</artifactId>
            <version>3.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.14</version>
        </dependency>

/**
     * 解析word
     *
     * @param filePath word文件路径
     * @return
     */
    public Map<String, List<String>> wordUtils(String filePath) {
        try {
            FileInputStream in = new FileInputStream(filePath);//载入文档
            //office2007  docx格式
            if (filePath.toLowerCase().endsWith("docx")) {
                //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
                XWPFDocument xwpf = new XWPFDocument(in);//得到word文档的信息
                //List<XWPFParagraph> listParagraphs = xwpf.getParagraphs();//得到段落信息
                Iterator<XWPFTable> it = xwpf.getTablesIterator();//得到word中的表格
                //存放题,一道题一个Key
                Map<String, List<String>> map = new LinkedHashMap<>();
                //记录题数:
                int count = 0;
                while (it.hasNext()) {
                    XWPFTable table = it.next();
                    //题,选项,答案,解析
                    List<String> list = new ArrayList<>();
                    List<XWPFTableRow> rows = table.getRows();
                    //读取每一行数据
                    for (int i = 0; i < rows.size(); i++) {
                        XWPFTableRow row = rows.get(i);
                        StringBuffer str = new StringBuffer();
                        //读取每一列数据
                        List<XWPFTableCell> cells = row.getTableCells();
                        for (int j = 0; j < cells.size(); j++) {
                            XWPFTableCell cell = cells.get(j);
                            //获取列中的段落
                            for (int j1 = 0; j1 < cell.getParagraphs().size(); j1++) {
                                //获取段落中的字符,包括空格,每个字符为一个XWPFRun对象
                                List<XWPFRun> runs = cell.getParagraphs().get(j1).getRuns();
                                int number = 0;
                                for (int j2 = 0; j2 < runs.size(); j2++) {
                                    //获取单个对象
                                    XWPFRun r = runs.get(j2);
                                    XWPFParagraph paragraph = r.getParagraph();
                                    //获取字符,0和-1都能用
                                    String text = r.getText(r.getTextPosition());
                                    //如果字符为空,可能是附件一类的文件,比如图片之类的,需要另外解析,此处处理为图片
                                    if (text == null) {
                                        //获取word图片地址
                                        List<String> imageBundleList = XWPFUtils.readImageInParagraph(paragraph);
                                        if (CollectionUtils.isNotEmpty(imageBundleList)) {
                                            for (int k = 0; k < imageBundleList.size(); k++) {
                                                //获取图片PictureData
                                                XWPFPictureData pictureData = xwpf.getPictureDataByID(imageBundleList.get(number));
                                                number++;
                                                //图片名称
                                                String imageName = pictureData.getFileName();
                                                //拼接图片保存地址
                                                String path = "/data/website/uploads/word/" + imageName;
                                                byte[] data = pictureData.getData();
                                                FileOutputStream fos = new FileOutputStream(path);
                                                fos.write(data);
                                                fos.close();
                                                //是.wmf公式,转成png
                                                if (imageName.contains("wmf") || imageName.contains("WMF")) {
                                                    List<String> wmfList = new ArrayList<>();
                                                    wmfList.add(path);
                                                    //emf或者wmf转换为png图片格式
                                                    String pngPath = emfConversionPng(wmfList);
                                                    //图片转base64
                                                    String s = GetImageStr(pngPath);
                                                    String img = "<img src='" + s + "' />";
                                                    //String img = "<img src='" + pngPath + "' />";
                                                    str.append(img);
                                                } else {
                                                    //图片转base64
                                                    String s = GetImageStr(path);
                                                    String img = "<img src='" + s + "' />";
                                                    //String img = "<img src='" + imageName + "' />";
                                                    str.append(img);
                                                }
                                                if (number == imageBundleList.size()) {
                                                    number = 0;
                                                }
                                                break;
                                            }
                                        }

                                    } else {
                                        str.append(text);
                                    }
                                }
                            }
                        }
                        if (StringUtils.isNoneBlank(str)) {
                            list.add(str.toString());
                            //System.out.println(str);
                        }
                    }
                    if (list != null && list.size() > 0) {
                        map.put(count + "", list);
                        count++;
                    }
                }
                map.remove("0");
                return map;
            }
        } catch (Exception e) {
            log.error("解析word异常!");
        }
        return null;
    }

    /**
     * 图片转base64
     *
     * @param imgFilePath
     * @return
     */
    public static String GetImageStr(String imgFilePath) {// 将图片文件转化为字节数组字符串,并对其进行Base64编码处理
        byte[] data = null;

        // 读取图片字节数组
        try {
            InputStream in = new FileInputStream(imgFilePath);
            data = new byte[in.available()];
            in.read(data);
            in.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        // 对字节数组Base64编码
        BASE64Encoder encoder = new BASE64Encoder();
        String s = "data:image/png;base64," + encoder.encode(data);
        return s;// 返回Base64编码过的字节数组字符串
    }


    /**
     * emf或者wmf转换为png图片格式
     *
     * @param
     * @return
     * @throws IOException
     */
    public static String emfConversionPng(List<String> list) throws IOException {
        if (list.size() > 0) {
            // 对文件的命名进行重新修改
            for (int i = 0; i < list.size(); i++) {
                String saveUrl = list.get(i);
                // 从doc文档解析的图片很有可能已经是png了,所以此处需要判断
                if (saveUrl.contains("emf") || saveUrl.contains("EMF")) {
                    InputStream is = new FileInputStream(saveUrl);
                    EMFInputStream eis = new EMFInputStream(is,
                            EMFInputStream.DEFAULT_VERSION);
                    EMFRenderer emfRenderer = new EMFRenderer(eis);
                    final int width = (int) eis.readHeader().getBounds()
                            .getWidth();
                    final int height = (int) eis.readHeader().getBounds()
                            .getHeight();
                    // 设置图片的大小和样式
                    final BufferedImage result = new BufferedImage(width + 60,
                            height + 40, BufferedImage.TYPE_4BYTE_ABGR);
                    Graphics2D g2 = result.createGraphics();
                    emfRenderer.paint(g2);
                    String url = saveUrl.replace(
                            saveUrl.substring(saveUrl.length() - 3), "png");
                    File outputfile = new File(url);
                    // 写入到磁盘中(格式设置为png背景不会变为橙色)
                    ImageIO.write(result, "png", outputfile);
                    // 当前的图片写入到磁盘中后,将流关闭
                    if (eis != null) {
                        eis.close();
                    }
                    if (is != null) {
                        is.close();
                    }
                } else if (saveUrl.contains("wmf") || saveUrl.contains("WMF")) {
                    // 将wmf转svg
                    String svgFile = saveUrl.substring(0,
                            saveUrl.lastIndexOf(".wmf"))
                            + ".svg";
                    wmfToSvg(saveUrl, svgFile);
                    // 将svg转png
                    String jpgFile = saveUrl.substring(0,
                            saveUrl.lastIndexOf(".wmf"))
                            + ".png";
                    svgToJpg(svgFile, jpgFile);
                    return jpgFile;
                }
            }
        }
        return null;
    }


    /**
     * 将wmf转换为svg
     *
     * @param src
     * @param dest
     */
    public static void wmfToSvg(String src, String dest) {
        File file = new File(src);
        boolean compatible = false;
        try {
            InputStream in = new FileInputStream(file);
            WmfParser parser = new WmfParser();
            final SvgGdi gdi = new SvgGdi(compatible);
            parser.parse(in, gdi);

            Document doc = gdi.getDocument();
            OutputStream out = new FileOutputStream(dest);
            if (dest.endsWith(".svgz")) {
                out = new GZIPOutputStream(out);
            }
            output(doc, out);
            if (out != null) {
                out.close();
            }
            if (in != null) {
                in.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {

        }
    }

    /**
     * 输出信息
     *
     * @param doc
     * @param out
     * @throws Exception
     */
    private static void output(Document doc, OutputStream out) throws Exception {
        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer transformer = factory.newTransformer();
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,
                "-//W3C//DTD SVG 1.0//EN");
        transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
                "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
        transformer.transform(new DOMSource(doc), new StreamResult(out));
        if (out != null) {
            out.flush();
            out.close();
        }
    }

    /**
     * 将svg转化为JPG
     *
     * @param src
     * @param dest
     */
    public static void svgToJpg(String src, String dest) {
        FileOutputStream jpgOut = null;
        FileInputStream svgStream = null;
        ByteArrayOutputStream svgOut = null;
        ByteArrayInputStream svgInputStream = null;
        ByteArrayOutputStream jpg = null;
        File svg = null;
        try {
            // 获取到svg文件
            svg = new File(src);
            svgStream = new FileInputStream(svg);
            svgOut = new ByteArrayOutputStream();
            // 获取到svg的stream
            int noOfByteRead = 0;
            while ((noOfByteRead = svgStream.read()) != -1) {
                svgOut.write(noOfByteRead);
            }
            ImageTranscoder it = new PNGTranscoder();
            it.addTranscodingHint(JPEGTranscoder.KEY_QUALITY, new Float(1f));
            it.addTranscodingHint(ImageTranscoder.KEY_WIDTH, new Float(35));
            jpg = new ByteArrayOutputStream();
            svgInputStream = new ByteArrayInputStream(svgOut.toByteArray());
            it.transcode(new TranscoderInput(svgInputStream),
                    new TranscoderOutput(jpg));
            jpgOut = new FileOutputStream(dest);
            jpgOut.write(jpg.toByteArray());
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (svgInputStream != null) {
                    svgInputStream.close();
                }
                if (jpg != null) {
                    jpg.close();
                }
                if (svgStream != null) {
                    svgStream.close();

                }
                if (svgOut != null) {
                    svgOut.close();
                }
                if (jpgOut != null) {
                    jpgOut.flush();
                    jpgOut.close();
                }
                if (svg != null) {
                    svg.delete();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }



import com.microsoft.schemas.vml.CTShape;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObject;
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTObject;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;

import java.util.ArrayList;
import java.util.List;
//获得图片索引工具类
public class XWPFUtils {

    //获取某一个段落中的所有图片索引
    public static List<String> readImageInParagraph(XWPFParagraph paragraph) {
        //图片索引List
        List<String> imageBundleList = new ArrayList<>();

        //段落中所有XWPFRun
        List<XWPFRun> runList = paragraph.getRuns();
        for (XWPFRun run : runList) {
            //XWPFRun是POI对xml元素解析后生成的自己的属性,无法通过xml解析,需要先转化成CTR
            CTR ctr = run.getCTR();

            //对子元素进行遍历
            XmlCursor c = ctr.newCursor();
            //这个就是拿到所有的子元素:
            c.selectPath("./*");
            while (c.toNextSelection()) {
                XmlObject o = c.getObject();
                //如果子元素是<w:drawing>这样的形式,使用CTDrawing保存图片
                if (o instanceof CTDrawing) {
                    CTDrawing drawing = (CTDrawing) o;
                    CTInline[] ctInlines = drawing.getInlineArray();
                    for (CTInline ctInline : ctInlines) {
                        CTGraphicalObject graphic = ctInline.getGraphic();
                        //
                        XmlCursor cursor = graphic.getGraphicData().newCursor();
                        cursor.selectPath("./*");
                        while (cursor.toNextSelection()) {
                            XmlObject xmlObject = cursor.getObject();
                            // 如果子元素是<pic:pic>这样的形式
                            if (xmlObject instanceof CTPicture) {
                                org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture picture = (org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture) xmlObject;
                                //拿到元素的属性
                                imageBundleList.add(picture.getBlipFill().getBlip().getEmbed());
                            }
                        }
                    }
                }
                //使用CTObject保存图片
                //<w:object>形式
                if (o instanceof CTObject) {
                    CTObject object = (CTObject) o;
                    System.out.println(object);
                    XmlCursor w = object.newCursor();
                    w.selectPath("./*");
                    while (w.toNextSelection()) {
                        XmlObject xmlObject = w.getObject();
                        if (xmlObject instanceof CTShape) {
                            CTShape shape = (CTShape) xmlObject;
                            imageBundleList.add(shape.getImagedataArray()[0].getId2());
                        }
                    }
                }
            }
        }
        return imageBundleList;
    }

}
//需要word试卷模板,联系我,本程序,只适合我自己设计的模板!不通用!





评论 39
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值