一、需求
应用场景是这样。有一个HTML页面,内含公式和图片,需要转到Word并下载。
框架是Vue + Java.
其中公式分为两种,一种是Mathml标签:
<math>
<mrow>
<msup><mi>a</mi><mn>3</mn></msup>
<mo>+</mo>
<msup><mi>b</mi><mn>2</mn></msup>
<mo>=</mo>
<msup><mi>c</mi><mn>2</mn></msup>
</mrow>
</math>
还有一种是MathType格式的img标签:
<img class="Wirisformula" style="max-width:none;vertical-align:-16px;" role="math" alt="x equals fraction numerator negative b plus-or-minus square root of b squared minus 4 a c end root over denominator 2 a end fraction" height="50" width="147" data-mathml="«math xmlns=¨http://www.w3.org/1998/Math/MathML¨»«mi»x«/mi»«mo»=«/mo»«mfrac»«mrow»«mo»-«/mo»«mi»b«/mi»«mo»§#177;«/mo»«msqrt»«msup»«mi»b«/mi»«mn»2«/mn»«/msup»«mo»-«/mo»«mn»4«/mn»«mi»a«/mi»«mi»c«/mi»«/msqrt»«/mrow»«mrow»«mn»2«/mn»«mi»a«/mi»«/mrow»«/mfrac»«/math»" src="data:image/svg+xml;charset=utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20xmlns%3Awrs%3D%22http%3A%2F%2Fwww.wiris.com%2Fxml%2Fmathml-extension%22%20height%3D%2250%22%20width%3D%22147%22%20wrs%3Abaseline%3D%2234%22%3E%3C!--MathML%3A%20%3Cmath%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F1998%2FMath%2FMathML%22%3E%3Cmi%3Ex%3C%2Fmi%3E%3Cmo%3E%3D%3C%2Fmo%3E%3Cmfrac%3E%3Cmrow%3E%3Cmo%3E-%3C%2Fmo%3E%3Cmi%3Eb%3C%2Fmi%3E%3Cmo%3E%26%23xB1%3B%3C%2Fmo%3E%3Cmsqrt%3E%3Cmsup%3E%3Cmi%3Eb%3C%2Fmi%3E%3Cmn%3E2%3C%2Fmn%3E%3C%2Fmsup%3E%3Cmo%3E-%3C%2Fmo%3E%3Cmn%3E4%3C%2Fmn%3E%3Cmi%3Ea%3C%2Fmi%3E%3Cmi%3Ec%3C%2Fmi%3E%3C%2Fmsqrt%3E%3C%2Fmrow%3E%3Cmrow%3E%3Cmn%3E2%3C%2Fmn%3E%3Cmi%3Ea%3C%2Fmi%3E%3C%2Fmrow%3E%3C%2Fmfrac%3E%3C%2Fmath%3E--%3E%3Cdefs%3E%3Cstyle%20type%3D%22text%2Fcss%22%3E%40font-face%7Bfont-family%3A'ae2ef524fbf3d9fe611d5a8e90fefdc'%3Bsrc%3Aurl(data%3Afont%2Ftruetype%3Bcharset%3Dutf-8%3Bbase64%2CAAEAAAAMAIAAAwBAT1MvMjv%2FLJYAAADMAAAATmNtYXDgWxEdAAABHAAAADRjdnQgAAAABwAAAVAAAAAEZ2x5ZoYrxVAAAAFUAAAA0WhlYWQOdyayAAACKAAAADZoaGVhC0UVwQAAAmAAAAAkaG10eCg8AIUAAAKEAAAACGxvY2EAAAVKAAACjAAAAAxtYXhwBIoEWwAAApgAAAAgbmFtZXSF9ZsAAAK4AAABrXBvc3QDogHPAAAEaAAAACBwcmVwukanGAAABIgAAAANAAAGtAGQAAUAAAgACAAAAAAACAAIAAAAAAAAAQIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAgICAAAAAg8AMGe%2F57AAAHPgGyAAAAAAACAAEAAQAAABQAAwABAAAAFAAEACAAAAAEAAQAAQAAAGH%2F%2FwAAAGH%2F%2F%2F%2BgAAEAAAAAAAAABwACAFUAAAMAA6sAAwAHAAAzESERJSERIVUCq%2F2rAgD%2BAAOr%2FFVVAwAAAwAt%2F3QEAwRZAAsAFwAdADsYAbAdELAD1LADELAU1LAUELAc1LAcELAJ1LAcELAOPLAJELAbPACwBhCwEdSwBhCwANSwABCwF9QwMQEiABEWEjMyEjcQJgYWAwIGIyImNTQ2MwE1BhMjEgIBs%2F7fFvWy07oDhYZwFgxOhVmysoUB7YwEslEEWf7f%2Ft71%2Ft8BM%2BMBp5yyLf6d%2FwBlyJzfsvxZjF0B5%2F1eAAAAAAEAAAABAACav9usXw889QADCAD%2F%2F%2F%2F%2F1a3uPf%2F%2F%2F%2F%2FVre49AAH%2B9QQDBkMAAAAKAAIAAQAAAAAAAQAABz7%2BTgAAF3AAAf%2F8BAMAAQAAAAAAAAAAAAAAAAAAAAIDUgBVBEwALQAAAAAAAAAoAAAA0QABAAAAAgAeAAMAAAAAAAIAgAQAAAAAAAQAADsAAAAAAAAAFQECAAAAAAAAAAEAFgAAAAAAAAAAAAIADgAWAAAAAAAAAAMANAAkAAAAAAAAAAQAFgBYAAAAAAAAAAUAFgBuAAAAAAAAAAYACwCEAAAAAAAAAAgAHACPAAEAAAAAAAEAFgAAAAEAAAAAAAIADgAWAAEAAAAAAAMANAAkAAEAAAAAAAQAFgBYAAEAAAAAAAUAFgBuAAEAAAAAAAYACwCEAAEAAAAAAAgAHACPAAMAAQQJAAEAFgAAAAMAAQQJAAIADgAWAAMAAQQJAAMANAAkAAMAAQQJAAQAFgBYAAMAAQQJAAUAFgBuAAMAAQQJAAYACwCEAAMAAQQJAAgAHACPAE0AYQB0AGgAIABGAG8AbgB0ACAAMgBSAGUAZwB1AGwAYQByAE0AYQB0AGgAcwAgAEYAbwByACAATQBvAHIAZQAgAE0AYQB0AGgAIABGAG8AbgB0ACAAMgBNAGEAdABoACAARgBvAG4AdAAgADIAVgBlAHIAcwBpAG8AbgAgADEALgAwTWF0aF9Gb250XzIATQBhAHQAaABzACAARgBvAHIAIABNAG8AcgBlAAAAAAMAAAAAAAADnwHPAAAAAAAAAAAAAAAAAAAAAAAAAAC5ByIAAI2FGACyAAAAAAAA)format('truetype')%3Bfont-weight%3Anormal%3Bfont-style%3Anormal%3B%7D%40font-face%7Bfont-family%3A'math19244194cbc38427b5aca056d4d'%3Bsrc%3Aurl(data%3Afont%2Ftruetype%3Bcharset%3Dutf-8%3Bbase64%2CAAEAAAAMAIAAAwBAT1MvMi7iBBMAAADMAAAATmNtYXDEvmKUAAABHAAAAERjdnQgDVUNBwAAAWAAAAA6Z2x5ZoPi2VsAAAGcAAABmGhlYWQQC2qxAAADNAAAADZoaGVhCGsXSAAAA2wAAAAkaG10eE2rRkcAAAOQAAAAEGxvY2EAHTwYAAADoAAAABRtYXhwBT0FPgAAA7QAAAAgbmFtZaBxlY4AAAPUAAABn3Bvc3QB9wD6AAAFdAAAACBwcmVwa1uragAABZQAAAAUAAADSwGQAAUAAAQABAAAAAAABAAEAAAAAAAAAQEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAgICAAAAAg1UADev96AAAD6ACWAAAAAAACAAEAAQAAABQAAwABAAAAFAAEADAAAAAIAAgAAgAAAD0AsSIS%2F%2F8AAAA9ALEiEv%2F%2F%2F8T%2FUd3xAAEAAAAAAAAAAAAAAVQDLACAAQAAVgAqAlgCHgEOASwCLABaAYACgACgANQAgAAAAAAAAAArAFUAgACrANUBAAErAAcAAAACAFUAAAMAA6sAAwAHAAAzESERJSERIVUCq%2F2rAgD%2BAAOr%2FFVVAwAAAgCAAOsC1QIVAAMABwBlGAGwCBCwBtSwBhCwBdSwCBCwAdSwARCwANSwBhCwBzywBRCwBDywARCwAjywABCwAzwAsAgQsAbUsAYQsAfUsAcQsAHUsAEQsALUsAYQsAU8sAcQsAQ8sAEQsAA8sAIQsAM8MTATITUhHQEhNYACVf2rAlUBwFXVVVUAAgCA%2F%2F8CgAKrAAsADwBlGAGwEBCwD9SwDxCwADywABCwAdSwARCwBNSwBBCwBdSwARCwCjywBBCwBzywBRCwDjwAsBAQsA%2FUsA8QsAzUsAwQsAnUsAkQsArUsAoQsAHUsAEQsALUsAEQsAQ8sAoQsAc8MDETMzUzFTMVIxUjJwcRIRUhgNZV1dVVAdUCAP4AAdXW1lbU1QH%2B1VUAAQCAAVUC1QGrAAMAMBgBsAQQsQAD9rADPLECB%2FWwATyxBQPmALEAABMQsQAG5bEAARMQsAE8sQMF9bACPBMhFSGAAlX9qwGrVgABAAAAAQAA1XjOQV8PPPUAAwQA%2F%2F%2F%2F%2F9Y6E3P%2F%2F%2F%2F%2F1joTcwAA%2FyAEgAOrAAAACgACAAEAAAAAAAEAAAPo%2F2oAABdwAAD%2FtgSAAAEAAAAAAAAAAAAAAAAAAAAEA1IAVQNWAIADAACAA1YAgAAAAAAAAAAoAAAAsgAAAU4AAAGYAAEAAAAEAF4ABQAAAAAAAgCABAAAAAAABAAA3gAAAAAAAAAVAQIAAAAAAAAAAQASAAAAAAAAAAAAAgAOABIAAAAAAAAAAwAwACAAAAAAAAAABAASAFAAAAAAAAAABQAWAGIAAAAAAAAABgAJAHgAAAAAAAAACAAcAIEAAQAAAAAAAQASAAAAAQAAAAAAAgAOABIAAQAAAAAAAwAwACAAAQAAAAAABAASAFAAAQAAAAAABQAWAGIAAQAAAAAABgAJAHgAAQAAAAAACAAcAIEAAwABBAkAAQASAAAAAwABBAkAAgAOABIAAwABBAkAAwAwACAAAwABBAkABAASAFAAAwABBAkABQAWAGIAAwABBAkABgAJAHgAAwABBAkACAAcAIEATQBhAHQAaAAgAEYAbwBuAHQAUgBlAGcAdQBsAGEAcgBNAGEAdABoAHMAIABGAG8AcgAgAE0AbwByAGUAIABNAGEAdABoACAARgBvAG4AdABNAGEAdABoACAARgBvAG4AdABWAGUAcgBzAGkAbwBuACAAMQAuADBNYXRoX0ZvbnQATQBhAHQAaABzACAARgBvAHIAIABNAG8AcgBlAAADAAAAAAAAAfQA%2BgAAAAAAAAAAAAAAAAAAAAAAAAAAuQcRAACNhRgAsgAAABUUE7EAAT8%3D)format('truetype')%3Bfont-weight%3Anormal%3Bfont-style%3Anormal%3B%7D%3C%2Fstyle%3E%3C%2Fdefs%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2216%22%20font-style%3D%22italic%22%20text-anchor%3D%22middle%22%20x%3D%224.5%22%20y%3D%2234%22%3Ex%3C%2Ftext%3E%3Ctext%20font-family%3D%22math19244194cbc38427b5aca056d4d%22%20font-size%3D%2216%22%20text-anchor%3D%22middle%22%20x%3D%2217.5%22%20y%3D%2234%22%3E%3D%3C%2Ftext%3E%3Cline%20stroke%3D%22%23000000%22%20stroke-linecap%3D%22square%22%20stroke-width%3D%221%22%20x1%3D%2228.5%22%20x2%3D%22143.5%22%20y1%3D%2228.5%22%20y2%3D%2228.5%22%2F%3E%3Ctext%20font-family%3D%22math19244194cbc38427b5aca056d4d%22%20font-size%3D%2216%22%20text-anchor%3D%22middle%22%20x%3D%2236.5%22%20y%3D%2222%22%3E%26%23x2212%3B%3C%2Ftext%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2216%22%20font-style%3D%22italic%22%20text-anchor%3D%22middle%22%20x%3D%2247.5%22%20y%3D%2222%22%3Eb%3C%2Ftext%3E%3Ctext%20font-family%3D%22math19244194cbc38427b5aca056d4d%22%20font-size%3D%2216%22%20text-anchor%3D%22middle%22%20x%3D%2260.5%22%20y%3D%2222%22%3E%26%23xB1%3B%3C%2Ftext%3E%3Cpolyline%20fill%3D%22none%22%20points%3D%2212%2C-20%2011%2C-20%205%2C0%202%2C-8%22%20stroke%3D%22%23000000%22%20stroke-linecap%3D%22square%22%20stroke-width%3D%221%22%20transform%3D%22translate(67.5%2C24.5)%22%2F%3E%3Cpolyline%20fill%3D%22none%22%20points%3D%225%2C0%202%2C-8%200%2C-7%22%20stroke%3D%22%23000000%22%20stroke-linecap%3D%22square%22%20stroke-width%3D%221%22%20transform%3D%22translate(67.5%2C24.5)%22%2F%3E%3Cline%20stroke%3D%22%23000000%22%20stroke-linecap%3D%22square%22%20stroke-width%3D%221%22%20x1%3D%2279.5%22%20x2%3D%22141.5%22%20y1%3D%224.5%22%20y2%3D%224.5%22%2F%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2216%22%20font-style%3D%22italic%22%20text-anchor%3D%22middle%22%20x%3D%2285.5%22%20y%3D%2222%22%3Eb%3C%2Ftext%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2212%22%20text-anchor%3D%22middle%22%20x%3D%2294.5%22%20y%3D%2215%22%3E2%3C%2Ftext%3E%3Ctext%20font-family%3D%22math19244194cbc38427b5aca056d4d%22%20font-size%3D%2216%22%20text-anchor%3D%22middle%22%20x%3D%22105.5%22%20y%3D%2222%22%3E%26%23x2212%3B%3C%2Ftext%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2216%22%20text-anchor%3D%22middle%22%20x%3D%22117.5%22%20y%3D%2222%22%3E4%3C%2Ftext%3E%3Ctext%20font-family%3D%22ae2ef524fbf3d9fe611d5a8e90fefdc%22%20font-size%3D%2216%22%20font-style%3D%22italic%22%20text-anchor%3D%22middle%22%20x%3D%22126.5%22%20y%3D%2222%22%3Ea%3C%2Ftext%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2216%22%20font-style%3D%22italic%22%20text-anchor%3D%22middle%22%20x%3D%22135.5%22%20y%3D%2222%22%3Ec%3C%2Ftext%3E%3Ctext%20font-family%3D%22Arial%22%20font-size%3D%2216%22%20text-anchor%3D%22middle%22%20x%3D%2281.5%22%20y%3D%2245%22%3E2%3C%2Ftext%3E%3Ctext%20font-family%3D%22ae2ef524fbf3d9fe611d5a8e90fefdc%22%20font-size%3D%2216%22%20font-style%3D%22italic%22%20text-anchor%3D%22middle%22%20x%3D%2290.5%22%20y%3D%2245%22%3Ea%3C%2Ftext%3E%3C%2Fsvg%3E" align="middle" />
这种可以从data-mathml中获取Mathml格式的数据
图片就是标准的img标签,src属性可以是图片链接或者BASE64格式的数据。
需求就是把这4种东西转到word里,其中图片要缓存下来,脱机可以正常显示。
二、依赖包
主要用到JACOB和POI,以及Mathml转OpenXml的fmath-mathml。
还有辅助包Jsoup
这里注意下POI的版本。统一用4.1.2。因为poi-ooxml-schemas在maven上只有4.1.2的,如果其他的POI用5.0会报错。
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sf.jacob-project/jacob -->
<dependency>
<groupId>net.sf.jacob-project</groupId>
<artifactId>jacob</artifactId>
<version>cus-1.0.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jacob.jar
</systemPath>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>com.jdom</groupId>
<artifactId>jdom</artifactId>
<version>cus-1.0.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jdom-2.0.6.jar
</systemPath>
</dependency>
<dependency>
<groupId>com.mathml</groupId>
<artifactId>mathml</artifactId>
<version>cus-1.0.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/fmath-mathml-java.jar
</systemPath>
</dependency>
三、代码
思路就是先把图片和公式用特殊的字符标记一下,例如${imageReplace1}
然后图片缓存成文件,拷贝到Word文档里。
公式比较简单,直接替换成OpenXml格式就可以了。
话不多说,直接上代码
1.继承自XWPFDocument的核心处理类
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlToken;
import org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps;
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTEffectExtent;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
import fmath.conversion.c.a;
/**
* 自定义 XWPFDocument
*/
public class CustomXWPFDocument extends XWPFDocument {
public CustomXWPFDocument(InputStream in) throws IOException {
super(in);
}
public CustomXWPFDocument() {
super();
}
public CustomXWPFDocument(OPCPackage pkg) throws IOException {
super(pkg);
}
/**
* @param ind
* @param width 宽
* @param height 高
* @param paragraph 段落
*/
public void createPicture(String blipId, int ind, int width, int height, XWPFRun run) {
final int EMU = 9525;
width *= EMU;
height *= EMU;
CTInline inline = run.getCTR().addNewDrawing().addNewInline();
String picXml = "" + "<a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">"
+ " <a:graphicData uri=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">"
+ " <pic:pic xmlns:pic=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">"
+ " <pic:nvPicPr>" + " <pic:cNvPr id=\"" + ind + "\" name=\"Generated\"/>"
+ " <pic:cNvPicPr/>" + " </pic:nvPicPr>" + " <pic:blipFill>"
+ " <a:blip r:embed=\"" + blipId
+ "\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"/>"
+ " <a:stretch>" + " <a:fillRect/>" + " </a:stretch>"
+ " </pic:blipFill>" + " <pic:spPr>" + " <a:xfrm>"
+ " <a:off x=\"0\" y=\"0\"/>" + " <a:ext cx=\"" + width + "\" cy=\""
+ height + "\"/>" + " </a:xfrm>" + " <a:prstGeom prst=\"rect\">"
+ " <a:avLst/>" + " </a:prstGeom>" + " </pic:spPr>"
+ " </pic:pic>" + " </a:graphicData>" + "</a:graphic>";
inline.addNewGraphic().addNewGraphicData();
XmlToken xmlToken = null;
try {
xmlToken = XmlToken.Factory.parse(picXml);
} catch (XmlException xe) {
xe.printStackTrace();
}
inline.set(xmlToken);
inline.setDistT(0);
inline.setDistB(0);
inline.setDistL(0);
inline.setDistR(0);
CTPositiveSize2D extent = inline.addNewExtent();
extent.setCx(width);
extent.setCy(height);
CTNonVisualDrawingProps docPr = inline.addNewDocPr();
docPr.setId(ind);
docPr.setName("图片" + ind);
docPr.setDescr("图片");
inline.addNewCNvGraphicFramePr();
}
public void createMath(String mathStr, XWPFRun run) {
XmlToken xmlToken = null;
try {
String openXML = a.a(mathStr);
String header = "<m:oMathPara xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\">";
openXML = header + openXML + "</m:oMathPara>";
xmlToken = XmlToken.Factory.parse(openXML);
} catch (XmlException xe) {
xe.printStackTrace();
}
run.getCTR().set(xmlToken);
}
}
2.处理公式和图片的工具类。
遍历所有的XWPFParagraph,找到需要替换的位置
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
/**
* 适用于word 2007
*/
public class OfficeUtil {
/**
* 根据指定的参数值、模板,生成 word 文档
*
* @param param 需要替换的变量
* @param template 模板
*/
public static CustomXWPFDocument generateWord(Map<String, Object> param, String template) {
CustomXWPFDocument doc = null;
try {
OPCPackage pack = POIXMLDocument.openPackage(template);
doc = new CustomXWPFDocument(pack);
if (param != null && param.size() > 0) {
// 处理段落
List<XWPFParagraph> paragraphList = doc.getParagraphs();
processParagraphs(paragraphList, param, doc);
// 处理表格
Iterator<XWPFTable> it = doc.getTablesIterator();
while (it.hasNext()) {
XWPFTable table = it.next();
List<XWPFTableRow> rows = table.getRows();
for (XWPFTableRow row : rows) {
List<XWPFTableCell> cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
List<XWPFParagraph> paragraphListTable = cell.getParagraphs();
processParagraphs(paragraphListTable, param, doc);
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return doc;
}
/**
* 处理段落
*
* @param paragraphList
*/
@SuppressWarnings("rawtypes")
public static void processParagraphs(List<XWPFParagraph> paragraphList, Map<String, Object> param,
CustomXWPFDocument doc) {
if (paragraphList != null && paragraphList.size() > 0) {
for (XWPFParagraph paragraph : paragraphList) {
// poi转换过来的行间距过大,需要手动调整
if (paragraph.getSpacingBefore() >= 1000 || paragraph.getSpacingAfter() > 1000) {
paragraph.setSpacingBefore(0);
paragraph.setSpacingAfter(0);
}
// 设置word中左右间距
// paragraph.setIndentationLeft(0);
// paragraph.setIndentationRight(0);
List<XWPFRun> runs = paragraph.getRuns();
// 加了图片,修改了paragraph的runs的size,所以循环不能使用runs
List<XWPFRun> allRuns = new ArrayList<XWPFRun>(runs);
for (XWPFRun run : allRuns) {
String text = run.getText(0);
if (text != null) {
for (Entry<String, Object> entry : param.entrySet()) {
String key = entry.getKey();
if (text.indexOf(key) != -1) {
Object value = entry.getValue();
if (value instanceof String) {// 文本替换
text = text.replace(key, value.toString());
} else if (value instanceof Map) {// 图片替换
Map pic = (Map) value;
String mapType = (String) pic.get("mapType");
if ("img".equals(mapType)) {
text = text.replace(key, "");
int width = Integer.parseInt(pic.get("width").toString());
int height = Integer.parseInt(pic.get("height").toString());
int picType = getPictureType(pic.get("type").toString());
byte[] byteArray = (byte[]) pic.get("content");
ByteArrayInputStream byteInputStream = new ByteArrayInputStream(byteArray);
try {
String blipId = doc.addPictureData(byteInputStream, picType);
doc.createPicture(blipId, doc.getNextPicNameNumber(picType) + 200, width,
height, run);
} catch (Exception e) {
e.printStackTrace();
}
run.setText(text, 0);
} else if ("math".equals(mapType)) {
doc.createMath((String) pic.get("content"), run);
}
}
}
}
}
}
}
}
}
/**
* 根据图片类型,取得对应的图片类型代码
*
* @param picType
* @return int
*/
private static int getPictureType(String picType) {
int res = CustomXWPFDocument.PICTURE_TYPE_PICT;
if (picType != null) {
if (picType.equalsIgnoreCase("png")) {
res = CustomXWPFDocument.PICTURE_TYPE_PNG;
} else if (picType.equalsIgnoreCase("dib")) {
res = CustomXWPFDocument.PICTURE_TYPE_DIB;
} else if (picType.equalsIgnoreCase("emf")) {
res = CustomXWPFDocument.PICTURE_TYPE_EMF;
} else if (picType.equalsIgnoreCase("jpg") || picType.equalsIgnoreCase("jpeg")) {
res = CustomXWPFDocument.PICTURE_TYPE_JPEG;
} else if (picType.equalsIgnoreCase("wmf")) {
res = CustomXWPFDocument.PICTURE_TYPE_WMF;
}
}
return res;
}
/**
* 将输入流中的数据写入字节数组
*
* @param in
* @return
*/
public static byte[] inputStream2ByteArray(InputStream in, boolean isClose) {
byte[] byteArray = null;
try {
int total = in.available();
byteArray = new byte[total];
in.read(byteArray);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (isClose) {
try {
in.close();
} catch (Exception e2) {
System.out.println("关闭流失败");
}
}
}
return byteArray;
}
}
3.JACOB HTML转Word
需求里是用模板,所有这里用的是Open命令不是Add
public static void htmlToWord(String html, String wordFile) {
ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word
try {
app.setProperty("Visible", new Variant(false));
Dispatch wordDoc = app.getProperty("Documents").toDispatch();
//打开word文件
wordDoc = Dispatch.invoke(wordDoc, "Open", Dispatch.Method, new Object[]{"d:/MathType/B4H.dotx", new Variant(true), new Variant(true)}, new int[1]).toDispatch();
// wordDoc = Dispatch.invoke(wordDoc, "Add", Dispatch.Method, new Object[0], new int[1]).toDispatch();
Dispatch selection = app.getProperty("Selection").toDispatch();
Dispatch.call(selection, "EndKey", new Variant(6));
Dispatch.invoke(selection, "InsertFile", Dispatch.Method,
new Object[] { html, "", new Variant(false), new Variant(false), new Variant(true) }, new int[3]);
Dispatch.invoke(wordDoc, "SaveAs", Dispatch.Method, new Object[] { wordFile, new Variant(16) }, new int[1]);
Dispatch.call(wordDoc, "Close", new Variant(false));
} catch (Exception e) {
e.printStackTrace();
} finally {
app.invoke("Quit", new Variant[] {});
}
}
4.找到HTML里需要替换的元素
这里用到了JSoup
//获取html中的需要替换的元素信息
public static HashMap<String, List<HashMap<String, String>>> getReplaceStr(String htmlStr) throws Exception {
HashMap<String, List<HashMap<String, String>>> replaceParams = new HashMap<String, List<HashMap<String, String>>>();
// 图片
List<HashMap<String, String>> pics = new ArrayList<HashMap<String, String>>();
// 图片公式
List<HashMap<String, String>> imgmaths = new ArrayList<HashMap<String, String>>();
Document doc = Jsoup.parse(htmlStr);
// 遍历img标签
Elements imgs = doc.select("img");
for (Element img : imgs) {
HashMap<String, String> map = new HashMap<String, String>();
if (img.hasAttr("data-mathml")) {
// mathtype格式的公式
String mathml = img.attr("data-mathml");
mathml = mathml.replace("«", "<").replace("»", ">").replace("¨", "\"")
.replace("§#177;", "±").replace("xmlns=\"http://www.w3.org/1998/Math/MathML\"", "");
map.put("tag", img.toString());
map.put("content", mathml);
imgmaths.add(map);
} else {
// 图片
String src = img.attr("src");
String type = "";
// 保存图片
String fileName = "";
if (src.startsWith("data:image")) {
type = src.substring(src.indexOf("/") + 1, src.indexOf(";"));
String regex = "data:image/(png|gif|jpg|jpeg|bmp|tif|psd|ICO);base64,";
fileName = saveBase64Img(src.replaceAll(regex, ""), type);
} else {
type = src.substring(src.lastIndexOf(".") + 1);
fileName = saveNetImg(img.attr("src"));
}
if(!"".equals(img.attr("width"))) {
map.put("width", img.attr("width"));
}
if(!"".equals(img.attr("height"))) {
map.put("height", img.attr("height"));
}
map.put("img", img.toString().substring(0, img.toString().length() - 1) + "/>");
map.put("img1", img.toString());
map.put("src", img.attr("src"));
map.put("fileName", fileName);
map.put("type", type);
pics.add(map);
}
}
replaceParams.put("pics", pics);
replaceParams.put("imgmaths", imgmaths);
return replaceParams;
}
5.入口调用方法
这里用到了HtmlUtils.htmlUnescape,是springweb里的一个方法,用来标准化html字符串
public static String writeWordFile(String content) throws Exception {
String path = "D:\\MathType";
Map<String, Object> param = new HashMap<String, Object>();
Map<String, String> mathParam = new HashMap<String, String>();
if (!"".equals(path)) {
File fileDir = new File(path);
if (!fileDir.exists()) {
fileDir.mkdirs();
}
content = HtmlUtils.htmlUnescape(content);
HashMap<String, List<HashMap<String, String>>> repalceMap = getReplaceStr(content);
// 图片公式
List<HashMap<String, String>> maths = repalceMap.get("imgmaths");
int count = 0;
for (HashMap<String, String> math : maths) {
count++;
String key = "${mathReplace" + count + "}";
content = content.replace(math.get("tag"), key + "\r\n");
Map<String, Object> header = new HashMap<String, Object>();
header.put("content", math.get("content"));
header.put("mapType", "math");
param.put(key, header);
}
// mathml公式
String regex = "(<math)(.*?)(</math>)";
Pattern r = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher m = r.matcher(content);
while (m.find()) {
count++;
String key = "${mathReplace" + count + "}";
String replaceStr = m.group(0);
content = content.replace(replaceStr, key);
Map<String, Object> header = new HashMap<String, Object>();
header.put("content", replaceStr);
header.put("mapType", "math");
param.put(key, header);
}
// 图片
List<HashMap<String, String>> imgs = repalceMap.get("pics");
count = 0;
for (HashMap<String, String> img : imgs) {
count++;
//处理替换以“/>”结尾的img标签
content = content.replace(img.get("img"), "${imgReplace" + count + "}");
//处理替换以“>”结尾的img标签
content = content.replace(img.get("img1"), "${imgReplace" + count + "}");
Map<String, Object> header = new HashMap<String, Object>();
try {
String imagePath = "d:/MathType/";
String[] sep = img.get("src").replaceAll("/", "\\\\").split("\\\\");
imagePath += sep[sep.length - 1];
//如果没有宽高属性,默认设置为400*300
if(img.get("width") == null || "".equals(img.get("width")) || img.get("height") == null || "".equals(img.get("height"))) {
header.put("width", 400);
header.put("height", 300);
}else {
header.put("width", (int) (Double.parseDouble(img.get("width"))));
header.put("height", (int) (Double.parseDouble(img.get("height"))));
}
header.put("type", img.get("type"));
header.put("mapType", "img");
header.put("content", OfficeUtil.inputStream2ByteArray(new FileInputStream(imagePath), true));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
param.put("${imgReplace" + count + "}", header);
}
try {
FileOutputStream fos = new FileOutputStream("d:/MathType/temp.html");
fos.write(content.getBytes("utf-8"));
fos.close();
htmlToWord("d:/MathType/temp.html", "d:/MathType/temp.docx");
// 临时文件(手动改好的docx文件)
CustomXWPFDocument doc = OfficeUtil.generateWord(param, mathParam, "D:\\MathType\\temp.docx");
//最终生成的带图片的word文件
FileOutputStream fopts = new FileOutputStream("D:\\MathType\\final.docx");
doc.write(fopts);
fopts.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return "D:/wordFile/final.docx";
}
大致就是这样。欢迎斧正。
附:对下划线做处理。场景是这样,在HTML中下划线是用u标签做的,并且字体不统一。
Document contentDoc = Jsoup.parse(content);
// 遍历u标签
Elements uList = contentDoc.select("u");
for (Element u : uList) {
if (u.selectFirst("span") != null) {
// span标签添加字体
String style = u.selectFirst("span").attr("style");
if (!style.contains("font-family")) {
style = "font-family: Calibri;" + style;
u.selectFirst("span").attr("style", style);
}
} else {
// u标签内没有span的情况
String ustr = u.html();
u.html("");
u.append("<span style='font-family: Calibri;font-size:10.5pt'>" + ustr + "</span>");
}
}
content = contentDoc.outerHtml();
邮箱:yushen6403@163.com