<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
public class HtmlConvertWordUtil {
public static XWPFDocument htmlConvertWord(String html) {
XWPFDocument doc = new XWPFDocument();
Document parse = Jsoup.parse(html);
Elements es = parse.body().getAllElements();
List<Element> tag1 = es.stream().filter(x -> "div".equals(x.tagName()) ||"p".equals(x.tagName()) || "h1".equals(x.tagName()) || "h2".equals(x.tagName()) || "h3".equals(x.tagName()) || "table".equals(x.tagName())).collect(Collectors.toList());
for (Element e : tag1) {
createXWPFParagraph(doc, e);
}
return doc;
}
public static void createXWPFParagraph(XWPFDocument docxDocument, Element e) {
XWPFParagraph paragraph = docxDocument.createParagraph();
List<String> allStyles = new ArrayList<>();
createXWPFRun(docxDocument, paragraph, e, allStyles);
}
public static void createXWPFRun(XWPFDocument docxDocument, XWPFParagraph paragraph, Element e, List<String> allStyles) {
List<String> parentStyle = new ArrayList<>(Arrays.asList(e.attr("style") == null ? new String[0] : e.attr("style").split(";")));
allStyles.addAll(parentStyle);
if (e.tagName().contains("div")||e.tagName().contains("h") || "p".equals(e.tagName())) {
allStyles.add(e.tagName() + ":");
}
List<Node> nodes = e.childNodes();
if (nodes != null && nodes.size() != 0) {
if ("table".equals(e.tagName())) {
XWPFTable table = docxDocument.createTable();
CTTblWidth width = table.getCTTbl().addNewTblPr().addNewTblW();
width.setType(STTblWidth.DXA);
width.setW(BigInteger.valueOf(9072));
nodes = nodes.stream().filter(x -> x instanceof Element).collect(Collectors.toList());
Element tableBody = (Element) nodes.get(0);
table.removeRow(0);
tableBody.childNodes().stream().filter(x -> x instanceof Element).map(x -> (Element) x).forEach(x -> {
XWPFTableRow row = table.createRow();
AtomicInteger i = new AtomicInteger();
x.childNodes().stream().filter(c -> c instanceof Element).map(c -> (Element) c).forEach(c -> {
i.getAndIncrement();
XWPFTableCell cell = row.getCell(i.intValue() - 1);
if (cell == null) {
cell = row.createCell();
}
CTTcPr tcpr = cell.getCTTc().addNewTcPr();
CTVerticalJc va = tcpr.addNewVAlign();
va.setVal(STVerticalJc.CENTER);
List<String> tempStyles = new ArrayList<>();
tempStyles = cellStyle(c, tempStyles);
if (tempStyles.size() != 0) {
allStyles.addAll(tempStyles);
XWPFRun run = cell.addParagraph().createRun();
run.setText(c.text());
setFontStyle(allStyles, run, paragraph, docxDocument, c);
allStyles.removeAll(allStyles);
} else {
cell.setText(c.text());
}
});
});
return;
}
for (Node node : nodes) {
XWPFRun run = paragraph.createRun();
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
run.setText(textNode.text().replaceAll(" ", ""));
setFontStyle(allStyles, run, paragraph, docxDocument, null);
} else if (node instanceof Element) {
Element children = (Element) node;
List<String> childrenStyle = new ArrayList<>(Arrays.asList(children.attr("style") == null ? new String[0] : children.attr("style").split(";")));
String tagName = children.tagName();
childrenStyle.add(addTagStyle(tagName, children));
allStyles.addAll(childrenStyle);
List<Node> grandsons = children.childNodes().stream().filter(x -> x instanceof Element).collect(Collectors.toList());
if (grandsons != null && grandsons.size() != 0) {
createXWPFRun(docxDocument, paragraph, children, allStyles);
}
List<Node> childrenNodes = children.childNodes().stream().filter(x -> x instanceof TextNode).collect(Collectors.toList());
if (childrenNodes != null && childrenNodes.size() != 0) {
String text = children.text();
List<String> aStyle = allStyles.stream().filter(x -> x.indexOf("a:") >= 0).collect(Collectors.toList());
if (aStyle == null || aStyle.size() == 0) {
run.setText(text.replaceAll(" ", ""));
}
setFontStyle(allStyles, run, paragraph, docxDocument, children);
}
allStyles.removeAll(childrenStyle);
}
}
} else {
if ("hr".equals(e.tagName())) {
XWPFRun run = paragraph.createRun();
run.setText("———————————————————————————————————————");
}
}
}
public static XWPFRun addPicture(XWPFRun run, String pictureUrl,String fileName) {
if (pictureUrl == null) {
return run;
}
URL url = null;
InputStream inputStream = null;
try {
pictureUrl = URLDecoder.decode(pictureUrl,"UTF-8");
url = new URL(pictureUrl);
inputStream = url.openConnection().getInputStream();
run.addPicture(inputStream, getPictureType(pictureUrl),fileName, Units.toEMU(400), Units.toEMU(256));
} catch (MalformedURLException e) {
throw new RuntimeException("图片url解析异常,url=" + pictureUrl);
} catch (IOException e) {
throw new RuntimeException("获取图片异常,url=" + pictureUrl);
} catch (InvalidFormatException e) {
throw new RuntimeException("添加图片异常,url=" + pictureUrl);
}
return run;
}
private static int getPictureType(String picType){
int res = XWPFDocument.PICTURE_TYPE_PICT;
if(picType != null){
if(picType.equalsIgnoreCase("png")){
res = XWPFDocument.PICTURE_TYPE_PNG;
}else if(picType.equalsIgnoreCase("dib")){
res = XWPFDocument.PICTURE_TYPE_DIB;
}else if(picType.equalsIgnoreCase("emf")){
res = XWPFDocument.PICTURE_TYPE_EMF;
}else if(picType.equalsIgnoreCase("jpg") || picType.equalsIgnoreCase("jpeg")){
res = XWPFDocument.PICTURE_TYPE_JPEG;
}else if(picType.equalsIgnoreCase("wmf")){
res = XWPFDocument.PICTURE_TYPE_WMF;
}
}
return res;
}
private static List<String> cellStyle(Element c, List<String> tempStyles) {
List<Element> collect = c.childNodes().stream().filter(s -> s instanceof Element).map(s -> (Element) s).collect(Collectors.toList());
for (Element element : collect) {
tempStyles.add(addTagStyle(element.tagName(), element));
List<Node> childs = element.childNodes().stream().filter(s -> s instanceof Element).collect(Collectors.toList());
if (childs != null && childs.size() != 0) {
cellStyle(element, tempStyles);
}
}
return tempStyles;
}
public static String addTagStyle(String tagName, Element children) {
String style = "";
switch (tagName) {
case "font":
if (StringUtils.isNotBlank(children.attr("face"))) {
style = "face:" + children.attr("face");
}
if (StringUtils.isNotBlank(children.attr("size"))) {
style = "size:" + children.attr("size");
}
if (StringUtils.isNotBlank(children.attr("color"))) {
style = "color:" + children.attr("color");
}
break;
case "strike":
style = "strike:";
break;
case "br":
style = "br:";
break;
case "u":
style = "u:";
break;
case "i":
style = "i:";
break;
case "b":
style = "b:";
break;
case "a":
if (StringUtils.isNotBlank(children.attr("href"))) {
style = "a:" + children.attr("href");
}
break;
}
return style;
}
public static void setFontStyle(List<String> styles, XWPFRun run, XWPFParagraph paragraph, XWPFDocument docxDocument, Element children) {
Collections.sort(styles, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
if (o1.contains("h") || "p".equals(o1)||o1.contains("div")) {
return -1;
} else if ("p".equals(o2) || o2.contains("h")||o2.contains("div")) {
return 1;
} else {
return 0;
}
}
});
for (String styleValue : styles) {
if (StringUtils.isBlank(styleValue)) {
continue;
}
String style = styleValue.substring(0, styleValue.indexOf(":")).replaceAll(" ", "");
String value = styleValue.substring(styleValue.indexOf(":") + 1).replaceAll(" ", "");
switch (style) {
case "p":
break;
case "div":
paragraph.setAlignment(ParagraphAlignment.BOTH);
paragraph.setIndentationFirstLine(567);
break;
case "h1":
addCustomHeadingStyle(docxDocument, "标题 1", 1);
paragraph.setStyle("标题 1");
run.setBold(true);
run.setColor("000000");
run.setFontFamily("宋体");
run.setFontSize(20);
break;
case "h2":
addCustomHeadingStyle(docxDocument, "标题 2", 2);
paragraph.setStyle("标题 2");
run.setBold(true);
run.setColor("000000");
run.setFontFamily("宋体");
run.setFontSize(18);
break;
case "h3":
addCustomHeadingStyle(docxDocument, "标题 3", 3);
paragraph.setStyle("标题 3");
run.setBold(true);
run.setColor("000000");
run.setFontFamily("宋体");
run.setFontSize(16);
break;
case "line-height":
run.setTextPosition(Integer.parseInt(value));
break;
case "face":
CTFonts ctFonts = run.getCTR().addNewRPr().addNewRFonts();
ctFonts.setEastAsia(value);
ctFonts.setAscii(value);
break;
case "size":
run.setFontSize(fontSizeConvert(value));
break;
case "color":
run.setColor(value.replaceAll("#", ""));
break;
case "strike":
run.setStrikeThrough(true);
break;
case "br":
run.addCarriageReturn();
break;
case "u":
run.setUnderline(UnderlinePatterns.SINGLE);
break;
case "i":
run.setItalic(true);
break;
case "b":
run.setBold(true);
break;
case "background-color":
run.getCTR().addNewRPr().addNewHighlight().setVal(getBackground(value));
break;
case "a":
XWPFRun hyperRun = paragraph.insertNewHyperlinkRun(0, value);
hyperRun.setText(children.text().replaceAll(" ", ""));
hyperRun.setColor("0563C1");
hyperRun.setUnderline(UnderlinePatterns.SINGLE);
break;
case "TEXT-ALIGN":
if ("center".equals(value)) {
paragraph.setAlignment(ParagraphAlignment.CENTER);
} else if ("left".equals(value)) {
paragraph.setAlignment(ParagraphAlignment.LEFT);
} else if ("right".equals(value)) {
paragraph.setAlignment(ParagraphAlignment.RIGHT);
}
break;
}
}
}
private static void addCustomHeadingStyle(XWPFDocument docxDocument, String strStyleId, int headingLevel) {
CTStyle ctStyle = CTStyle.Factory.newInstance();
ctStyle.setStyleId(strStyleId);
CTString styleName = CTString.Factory.newInstance();
styleName.setVal(strStyleId);
ctStyle.setName(styleName);
CTDecimalNumber indentNumber = CTDecimalNumber.Factory.newInstance();
indentNumber.setVal(BigInteger.valueOf(headingLevel));
ctStyle.setUiPriority(indentNumber);
CTOnOff onoffnull = CTOnOff.Factory.newInstance();
ctStyle.setUnhideWhenUsed(onoffnull);
ctStyle.setQFormat(onoffnull);
CTPPr ppr = CTPPr.Factory.newInstance();
ppr.setOutlineLvl(indentNumber);
ctStyle.setPPr((CTPPrGeneral) ppr);
XWPFStyle style = new XWPFStyle(ctStyle);
XWPFStyles styles = docxDocument.createStyles();
style.setType(STStyleType.PARAGRAPH);
styles.addStyle(style);
}
public static Integer fontSizeConvert(String level) {
Integer fontSize = null;
if (StringUtils.isBlank(level)) {
return fontSize;
}
switch (level) {
case "1":
fontSize = 7;
break;
case "2":
fontSize = 8;
break;
case "3":
fontSize = 9;
break;
case "4":
fontSize = 10;
break;
case "5":
fontSize = 14;
break;
case "6":
fontSize = 18;
break;
case "7":
fontSize = 28;
break;
case "8":
fontSize = 36;
break;
case "9":
fontSize = 48;
break;
case "10":
fontSize = 72;
break;
default:
fontSize = 5;
}
return fontSize;
}
public static STHighlightColor.Enum getBackground(String color) {
color = color.replaceAll(" ", "");
if ("yellow".equals(color) || "rgb(255,255,0)".equals(color) || "#FFFF00".equals(color)) {
return STHighlightColor.YELLOW;
} else if ("lime".equals(color) || "rgb(0,255,0)".equals(color) || "#00FF00".equals(color)) {
return STHighlightColor.GREEN;
} else if ("aqua".equals(color) || "rgb(0,255,255)".equals(color) || "#00FFFF".equals(color)) {
return STHighlightColor.CYAN;
} else if ("fuchsia".equals(color) || "rgb(255,0,255)".equals(color) || "#FF00FF".equals(color)) {
return STHighlightColor.MAGENTA;
} else if ("blue".equals(color) || "rgb(0,0,255)".equals(color) || "#0000FF".equals(color)) {
return STHighlightColor.BLUE;
} else if ("red".equals(color) || "rgb(255,0,0)".equals(color) || "#FF0000".equals(color)) {
return STHighlightColor.RED;
} else if ("navy".equals(color) || "rgb(0,0,128)".equals(color) || "#000080".equals(color)) {
return STHighlightColor.DARK_BLUE;
} else if ("teal".equals(color) || "rgb(0,128,128)".equals(color) || "#008080".equals(color)) {
return STHighlightColor.DARK_CYAN;
} else if ("green".equals(color) || "rgb(0,128,0)".equals(color) || "#008000".equals(color)) {
return STHighlightColor.DARK_GREEN;
} else if ("purple".equals(color) || "rgb(128,0,128)".equals(color) || "#800080".equals(color)) {
return STHighlightColor.DARK_MAGENTA;
} else if ("maroon".equals(color) || "rgb(128,0,0)".equals(color) || "#800000".equals(color)) {
return STHighlightColor.DARK_RED;
} else if ("olive".equals(color) || "rgb(128,128,0)".equals(color) || "#808000".equals(color)) {
return STHighlightColor.DARK_YELLOW;
} else if ("gray".equals(color) || "rgb(128,128,128)".equals(color) || "#808080".equals(color)) {
return STHighlightColor.DARK_GRAY;
} else if ("silver".equals(color) || "rgb(192,192,192)".equals(color) || "#C0C0C0".equals(color)) {
return STHighlightColor.LIGHT_GRAY;
} else if ("black".equals(color) || "rgb(0,0,0)".equals(color) || "#000000".equals(color)) {
return STHighlightColor.BLACK;
} else {
return STHighlightColor.NONE;
}
}
}