一、html替换文本
场景:
前端使用富文本传输内容,如:<p>xxx1</p> <p>xxx2</p> <p>xxx3</p> <p><img src="../1595356463369330689_123.png"></p>
后端word中使用占位符${xxx}
,代码中获取到占位符所在段落,直接替换后样式为:
解决方案一:
解析常用标签<p><br><img>
等,见<p>
拼接四个“ ”,见</p>
强制换行,解析<img>
中的<src>,<weight>
等,将图片,图片标识写入文件路径位置。
缺陷:
1、图片为一个段落(XWPFParagraph)中的一行(XWPFRun),单run居中无效,并且在导出的word中居中图片则整个段落居中,无法满足需求。
2、预解析所有xml标签,防止内容扩充。
猜想:
递归根据<img>
切断,匹配<img>
在当前位置创建段落,然后插入图片,设置居中属性。
解决方案二:
// 获取到需要插入的位置索引
List<XWPFParagraph> oldList = document.getParagraphs();
int flag = -1;
for (int i = 0; i < oldList.size(); i++) {
XWPFParagraph paragraph = oldList.get(i);
List<XWPFRun> run = paragraph.getRuns();
for (XWPFRun xwpfRun : run) {
String text = xwpfRun.getText(xwpfRun.getTextPosition());
if (text != null && !text.isEmpty() && text.equals("前置内容")) {
flag = i;
break;
}
}
if (flag != -1) {
break;
}
}
// 插入html内容, 此时内容带标签
XWPFParagraph paragraph = document.getParagraphArray(++flag);
XmlCursor cursor = paragraph.getCTP().newCursor();
cursor.toNextSibling();
XWPFParagraph newParagraph = document.insertNewParagraph(cursor);
XWPFRun run = newParagraph.createRun();
run.setText(content);
// 去除除标签<>中的所有空格
Pattern p = Pattern.compile("<(.*?)(>)");
Matcher m = p.matcher(content);
boolean result = m.find();
List<String> title = new ArrayList<>();
if (result) {
while (result) {
String str = m.group(0);
content = content.replace(str, "term-vince");
title.add(str);
result = m.find();
}
}
content = content.replace(" ", "");
for (String s : title) {
content = content.replaceFirst("term-vince", s);
}
// 设置基本格式与要插入的word格式相同
// line-height:150%:行距,text-indent:24.0pt 首行缩进
// 替换富文本的居中
content = content.replace("<p>", "<p style=\"line-height:150%; text-indent:24.0pt\">");
content = content.replace("class=\"ql-align-center\"", "style=\"text-align: center;\"");
MyXWPFHtmlDocument myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc" + ++index);
// 获取到所有图片路径
List<String> imgSrc = Optional.of(getImgSrc(content)).orElse(new ArrayList<>());
for (String url : imgSrc) {
// 将图片url转换为base64并替换
// 如果是地址引用,引用地址在打开word时生效,引用为客户端文件,所以找不到文件。
String picture = url2picture(url);
content = content.replace(url, "data:image/png;base64," + picture);
}
myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>", "<body style=\"font-family:'FangSong';\"> " + content + "</body>"));
replaceIBodyElementWithAltChunk(document, loop.getProblemDesc() + "", myXWPFHtmlDocument);
/**
* 自定义文档小节
*/
private static class MyXWPFHtmlDocument extends POIXMLDocumentPart {
private String html;
private String id;
private MyXWPFHtmlDocument(PackagePart part, String id) throws Exception {
super(part);
this.html = "<!DOCTYPE html><html><head><style></style><title>HTML import</title></head><body></body>";
this.id = id;
}
private String getId() {
return id;
}
private String getHtml() {
return html;
}
private void setHtml(String html) {
this.html = html;
}
@Override
protected void commit() throws IOException {
PackagePart part = getPackagePart();
OutputStream out = part.getOutputStream();
Writer writer = new OutputStreamWriter(out, "UTF-8");
writer.write(html);
writer.close();
out.close();
}
}
/**
* 创建html类型的小节
*/
private static MyXWPFHtmlDocument createHtmlDoc(XWPFDocument document, String id) throws Exception {
OPCPackage oPCPackage = document.getPackage();
PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
PackagePart part = oPCPackage.createPart(partName, "text/html");
MyXWPFHtmlDocument myXWPFHtmlDocument = new MyXWPFHtmlDocument(part, id);
document.addRelation(myXWPFHtmlDocument.getId(), new XWPFHtmlRelation(), myXWPFHtmlDocument);
return myXWPFHtmlDocument;
}
/**
* 绑定样式
*/
private final static class XWPFHtmlRelation extends POIXMLRelation {
private XWPFHtmlRelation() {
super(
"text/html",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk",
"/word/htmlDoc#.html");
}
}
private static void replaceIBodyElementWithAltChunk(XWPFDocument document, String textToFind,
MyXWPFHtmlDocument myXWPFHtmlDocument) throws Exception {
int pos = 0;
for (IBodyElement bodyElement : document.getBodyElements()) {
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph) bodyElement;
String text = paragraph.getText();
if (text != null && text.contains(textToFind)) {
XmlCursor cursor = paragraph.getCTP().newCursor();
cursor.toEndToken();
while (cursor.toNextToken() != org.apache.xmlbeans.XmlCursor.TokenType.START) ;
String uri = CTAltChunk.type.getName().getNamespaceURI();
cursor.beginElement("altChunk", uri);
cursor.toParent();
CTAltChunk cTAltChunk = (CTAltChunk) cursor.getObject();
cTAltChunk.setId(myXWPFHtmlDocument.getId());
document.removeBodyElement(pos);
break;
}
}
pos++;
}
}
/**
* 图片转字符串
* @param uri 图片路径
* @return base64字符串
*/
private static String url2picture(String uri) {
InputStream in = null;
byte[] data = null;
//读取图片字节数组
try
{
in = new FileInputStream(uri);
data = new byte[in.available()];
in.read(data);
in.close();
}
catch (IOException e)
{
e.printStackTrace();
}
//对字节数组Base64编码
Base64Encoder encoder = new Base64Encoder();
return encoder.encode(data);
}
/**
* 获取img标签的src
*/
public static List<String> getImgSrc(String content){
List<String> list = new ArrayList<String>();
//目前img标签标示有3种表达式
//开始匹配content中的<img />标签
Pattern p_img = Pattern.compile("<(img|IMG)(.*?)(/>|></img>|>)");
Matcher m_img = p_img.matcher(content);
boolean result_img = m_img.find();
if (result_img) {
while (result_img) {
//获取到匹配的<img />标签中的内容
String str_img = m_img.group(2);
//开始匹配<img />标签中的src
Pattern p_src = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
Matcher m_src = p_src.matcher(str_img);
if (m_src.find()) {
String str_src = m_src.group(3);
list.add(str_src);
}
//结束匹配<img />标签中的src
//匹配content中是否存在下一个<img />标签,有则继续以上步骤匹配<img />标签中的src
result_img = m_img.find();
}
}
//去掉静态表情图和编辑器表情图片,和原本是string的
List<String> srcstemp=new ArrayList<>();
for(String imageSrc:list){
if(!imageSrc.contains("base64") && !imageSrc.contains("/static") && !imageSrc.contains("/emotion")){
srcstemp.add(imageSrc);
}
}
list=srcstemp;
return list;
}
踩坑:
默认样式与原word样式不同,通过字符串替换解决,或许还能解决更多问题。
声明:代码中创建html格式part引用自此