需要的jar包 <!-- doc文档的emf图片格式转png图片 --> <dependency> <groupId>org.freehep</groupId> <artifactId>freehep-graphicsio-emf</artifactId> <version>2.1.3</version> </dependency> <dependency> <groupId>org.freehep</groupId> <artifactId>freehep-io</artifactId> <version>2.0.5</version> </dependency> <!-- wmf转svg转png --> <dependency> <groupId>xml-apis</groupId> <artifactId>xml-apis-ext</artifactId> <version>1.3.04</version> <scope>provided</scope> </dependency> <dependency> <groupId>net.arnx</groupId> <artifactId>wmf2svg</artifactId> <version>0.9.5</version> </dependency> <dependency> <groupId>org.w3c</groupId> <artifactId>dom</artifactId> <version>2.3.0-jaxb-1.0.6</version> </dependency> <dependency> <groupId>org.codeartisans.thirdparties.swing</groupId> <artifactId>batik-all</artifactId> <version>1.8pre-r1084380</version> </dependency> <!--poi--> <dependency> <groupId>org.apache.xmlbeans</groupId> <artifactId>xmlbeans</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-excelant</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-examples</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency>
/** * 解析word * * @param filePath word文件路径 * @return */ public Map<String, List<String>> wordUtils(String filePath) { try { FileInputStream in = new FileInputStream(filePath);//载入文档 //office2007 docx格式 if (filePath.toLowerCase().endsWith("docx")) { //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后 XWPFDocument xwpf = new XWPFDocument(in);//得到word文档的信息 //List<XWPFParagraph> listParagraphs = xwpf.getParagraphs();//得到段落信息 Iterator<XWPFTable> it = xwpf.getTablesIterator();//得到word中的表格 //存放题,一道题一个Key Map<String, List<String>> map = new LinkedHashMap<>(); //记录题数: int count = 0; while (it.hasNext()) { XWPFTable table = it.next(); //题,选项,答案,解析 List<String> list = new ArrayList<>(); List<XWPFTableRow> rows = table.getRows(); //读取每一行数据 for (int i = 0; i < rows.size(); i++) { XWPFTableRow row = rows.get(i); StringBuffer str = new StringBuffer(); //读取每一列数据 List<XWPFTableCell> cells = row.getTableCells(); for (int j = 0; j < cells.size(); j++) { XWPFTableCell cell = cells.get(j); //获取列中的段落 for (int j1 = 0; j1 < cell.getParagraphs().size(); j1++) { //获取段落中的字符,包括空格,每个字符为一个XWPFRun对象 List<XWPFRun> runs = cell.getParagraphs().get(j1).getRuns(); int number = 0; for (int j2 = 0; j2 < runs.size(); j2++) { //获取单个对象 XWPFRun r = runs.get(j2); XWPFParagraph paragraph = r.getParagraph(); //获取字符,0和-1都能用 String text = r.getText(r.getTextPosition()); //如果字符为空,可能是附件一类的文件,比如图片之类的,需要另外解析,此处处理为图片 if (text == null) { //获取word图片地址 List<String> imageBundleList = XWPFUtils.readImageInParagraph(paragraph); if (CollectionUtils.isNotEmpty(imageBundleList)) { for (int k = 0; k < imageBundleList.size(); k++) { //获取图片PictureData XWPFPictureData pictureData = xwpf.getPictureDataByID(imageBundleList.get(number)); number++; //图片名称 String imageName = pictureData.getFileName(); //拼接图片保存地址 String path = "/data/website/uploads/word/" + imageName; byte[] data = pictureData.getData(); FileOutputStream fos = new FileOutputStream(path); fos.write(data); fos.close(); //是.wmf公式,转成png if (imageName.contains("wmf") || imageName.contains("WMF")) { List<String> wmfList = new ArrayList<>(); wmfList.add(path); //emf或者wmf转换为png图片格式 String pngPath = emfConversionPng(wmfList); //图片转base64 String s = GetImageStr(pngPath); String img = "<img src='" + s + "' />"; //String img = "<img src='" + pngPath + "' />"; str.append(img); } else { //图片转base64 String s = GetImageStr(path); String img = "<img src='" + s + "' />"; //String img = "<img src='" + imageName + "' />"; str.append(img); } if (number == imageBundleList.size()) { number = 0; } break; } } } else { str.append(text); } } } } if (StringUtils.isNoneBlank(str)) { list.add(str.toString()); //System.out.println(str); } } if (list != null && list.size() > 0) { map.put(count + "", list); count++; } } map.remove("0"); return map; } } catch (Exception e) { log.error("解析word异常!"); } return null; } /** * 图片转base64 * * @param imgFilePath * @return */ public static String GetImageStr(String imgFilePath) {// 将图片文件转化为字节数组字符串,并对其进行Base64编码处理 byte[] data = null; // 读取图片字节数组 try { InputStream in = new FileInputStream(imgFilePath); data = new byte[in.available()]; in.read(data); in.close(); } catch (IOException e) { e.printStackTrace(); } // 对字节数组Base64编码 BASE64Encoder encoder = new BASE64Encoder(); String s = "data:image/png;base64," + encoder.encode(data); return s;// 返回Base64编码过的字节数组字符串 } /** * emf或者wmf转换为png图片格式 * * @param * @return * @throws IOException */ public static String emfConversionPng(List<String> list) throws IOException { if (list.size() > 0) { // 对文件的命名进行重新修改 for (int i = 0; i < list.size(); i++) { String saveUrl = list.get(i); // 从doc文档解析的图片很有可能已经是png了,所以此处需要判断 if (saveUrl.contains("emf") || saveUrl.contains("EMF")) { InputStream is = new FileInputStream(saveUrl); EMFInputStream eis = new EMFInputStream(is, EMFInputStream.DEFAULT_VERSION); EMFRenderer emfRenderer = new EMFRenderer(eis); final int width = (int) eis.readHeader().getBounds() .getWidth(); final int height = (int) eis.readHeader().getBounds() .getHeight(); // 设置图片的大小和样式 final BufferedImage result = new BufferedImage(width + 60, height + 40, BufferedImage.TYPE_4BYTE_ABGR); Graphics2D g2 = result.createGraphics(); emfRenderer.paint(g2); String url = saveUrl.replace( saveUrl.substring(saveUrl.length() - 3), "png"); File outputfile = new File(url); // 写入到磁盘中(格式设置为png背景不会变为橙色) ImageIO.write(result, "png", outputfile); // 当前的图片写入到磁盘中后,将流关闭 if (eis != null) { eis.close(); } if (is != null) { is.close(); } } else if (saveUrl.contains("wmf") || saveUrl.contains("WMF")) { // 将wmf转svg String svgFile = saveUrl.substring(0, saveUrl.lastIndexOf(".wmf")) + ".svg"; wmfToSvg(saveUrl, svgFile); // 将svg转png String jpgFile = saveUrl.substring(0, saveUrl.lastIndexOf(".wmf")) + ".png"; svgToJpg(svgFile, jpgFile); return jpgFile; } } } return null; } /** * 将wmf转换为svg * * @param src * @param dest */ public static void wmfToSvg(String src, String dest) { File file = new File(src); boolean compatible = false; try { InputStream in = new FileInputStream(file); WmfParser parser = new WmfParser(); final SvgGdi gdi = new SvgGdi(compatible); parser.parse(in, gdi); Document doc = gdi.getDocument(); OutputStream out = new FileOutputStream(dest); if (dest.endsWith(".svgz")) { out = new GZIPOutputStream(out); } output(doc, out); if (out != null) { out.close(); } if (in != null) { in.close(); } } catch (Exception e) { e.printStackTrace(); } finally { } } /** * 输出信息 * * @param doc * @param out * @throws Exception */ private static void output(Document doc, OutputStream out) throws Exception { TransformerFactory factory = TransformerFactory.newInstance(); Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "-//W3C//DTD SVG 1.0//EN"); transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd"); transformer.transform(new DOMSource(doc), new StreamResult(out)); if (out != null) { out.flush(); out.close(); } } /** * 将svg转化为JPG * * @param src * @param dest */ public static void svgToJpg(String src, String dest) { FileOutputStream jpgOut = null; FileInputStream svgStream = null; ByteArrayOutputStream svgOut = null; ByteArrayInputStream svgInputStream = null; ByteArrayOutputStream jpg = null; File svg = null; try { // 获取到svg文件 svg = new File(src); svgStream = new FileInputStream(svg); svgOut = new ByteArrayOutputStream(); // 获取到svg的stream int noOfByteRead = 0; while ((noOfByteRead = svgStream.read()) != -1) { svgOut.write(noOfByteRead); } ImageTranscoder it = new PNGTranscoder(); it.addTranscodingHint(JPEGTranscoder.KEY_QUALITY, new Float(1f)); it.addTranscodingHint(ImageTranscoder.KEY_WIDTH, new Float(35)); jpg = new ByteArrayOutputStream(); svgInputStream = new ByteArrayInputStream(svgOut.toByteArray()); it.transcode(new TranscoderInput(svgInputStream), new TranscoderOutput(jpg)); jpgOut = new FileOutputStream(dest); jpgOut.write(jpg.toByteArray()); } catch (Exception e) { e.printStackTrace(); } finally { try { if (svgInputStream != null) { svgInputStream.close(); } if (jpg != null) { jpg.close(); } if (svgStream != null) { svgStream.close(); } if (svgOut != null) { svgOut.close(); } if (jpgOut != null) { jpgOut.flush(); jpgOut.close(); } if (svg != null) { svg.delete(); } } catch (IOException e) { e.printStackTrace(); } } }
//需要word试卷模板,联系我,本程序,只适合我自己设计的模板!不通用!import com.microsoft.schemas.vml.CTShape; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.xmlbeans.XmlCursor; import org.apache.xmlbeans.XmlObject; import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObject; import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture; import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTObject; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; import java.util.ArrayList; import java.util.List; //获得图片索引工具类 public class XWPFUtils { //获取某一个段落中的所有图片索引 public static List<String> readImageInParagraph(XWPFParagraph paragraph) { //图片索引List List<String> imageBundleList = new ArrayList<>(); //段落中所有XWPFRun List<XWPFRun> runList = paragraph.getRuns(); for (XWPFRun run : runList) { //XWPFRun是POI对xml元素解析后生成的自己的属性,无法通过xml解析,需要先转化成CTR CTR ctr = run.getCTR(); //对子元素进行遍历 XmlCursor c = ctr.newCursor(); //这个就是拿到所有的子元素: c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); //如果子元素是<w:drawing>这样的形式,使用CTDrawing保存图片 if (o instanceof CTDrawing) { CTDrawing drawing = (CTDrawing) o; CTInline[] ctInlines = drawing.getInlineArray(); for (CTInline ctInline : ctInlines) { CTGraphicalObject graphic = ctInline.getGraphic(); // XmlCursor cursor = graphic.getGraphicData().newCursor(); cursor.selectPath("./*"); while (cursor.toNextSelection()) { XmlObject xmlObject = cursor.getObject(); // 如果子元素是<pic:pic>这样的形式 if (xmlObject instanceof CTPicture) { org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture picture = (org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture) xmlObject; //拿到元素的属性 imageBundleList.add(picture.getBlipFill().getBlip().getEmbed()); } } } } //使用CTObject保存图片 //<w:object>形式 if (o instanceof CTObject) { CTObject object = (CTObject) o; System.out.println(object); XmlCursor w = object.newCursor(); w.selectPath("./*"); while (w.toNextSelection()) { XmlObject xmlObject = w.getObject(); if (xmlObject instanceof CTShape) { CTShape shape = (CTShape) xmlObject; imageBundleList.add(shape.getImagedataArray()[0].getId2()); } } } } } return imageBundleList; } }
POI解析word试卷(解析表格,wmf转png图片)
最新推荐文章于 2024-06-06 09:00:00 发布