html字符串导出为word

添加依赖

<!-- jsoup依赖 主要是解析图片标签,然后缩放图片大小-->

<dependency>

<groupId>org.jsoup</groupId>

<artifactId>jsoup</artifactId>

<version>1.12.1</version>

</dependency>

<!-- poi依赖-->

<dependency>

<groupId>org.apache.poi</groupId>

<artifactId>poi</artifactId>

<version>4.1.0</version>

</dependency>

<dependency>

<groupId>org.apache.poi</groupId>

<artifactId>poi-ooxml</artifactId>

<version>4.1.0</version>

</dependency>

controller代码

package com.zl.exportword;

import org.apache.commons.lang3.StringUtils;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

import org.springframework.web.bind.annotation.RequestMapping;

import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import java.awt.image.BufferedImage;

import java.io.BufferedInputStream;

import java.io.File;

import java.io.FileInputStream;

import java.util.ArrayList;

import java.util.List;

/**

* @author lei

* @version 1.0

* @date 2022/11/14 10:25

*/

@RestController

@RequestMapping("/export")

public class ExportController {

@RequestMapping(value = "/exportWord")

public void export(HttpServletRequest request, HttpServletResponse response) throws Exception {

try {

String tmpContent = "<h1>如何将富文本内容导出到word文档</h1><p style=\"color:red;font-size:20px;\">采用poi将富文本内容导出到word文档</p><div style=\"background-color:green;\">这是有背景颜色的div内容</div>\n" +

"<img src=\"这里写base64后的图片编码\">这是base64编码后的图片";

// 获取img图片标签

// 1.Jsoup解析html

Document document = Jsoup.parse(tmpContent);

// 获取所有img图片标签

Elements img = document.getElementsByTag("img");

int index = 0;

List<String> imgBase64List = new ArrayList<>();

for (Element element : img) {

imgBase64List.add(element.attr("src"));

// 处理特殊符号

String attrData = element.attr("src");

// base64编码后可能包含 + 特殊字符,所以需要转义

attrData = attrData.replaceAll("\\+", "\\\\+");

tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}");

index++;

}

// 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word

index = 0;

String prefix = "data:image/png;base64,"; // base64编码前缀

for (String base64 : imgBase64List) {

if (StringUtils.isNotBlank(base64)) {

// 缩小图片

base64 = base64.replaceAll(prefix, "");

BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64));

if (bufferedImage == null) {

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");

} else {

int height = bufferedImage.getHeight();

int width = bufferedImage.getWidth();

// 如果图片宽度大于650,图片缩放

//System.out.println("----"+width+"-----"+height);

if (width > 650) {

//高度等比缩放

height = (int)(height*650.0/width);

BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 700, height);

String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + imageToBase64);

} else {

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + base64);

}

}

} else {

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");

}

index++;

}

// 执行导出操作

WordUtil.exportHtmlToWord(request, response, tmpContent, "富文本内容导出word");

} catch (Exception e) {

e.printStackTrace();

}

}

/**

* 倒入本地测试

* @throws Exception

*/

@RequestMapping(value = "/export")

public void export() throws Exception {

try {

StringBuilder sb = new StringBuilder("<h1>如何将富文本内容导出到word文档</h1><p style=\"color:red;font-size:20px;\">采用poi将富文本内容导出到word文档</p><div style=\"background-color:green;\">这是有背景颜色的div内容</div>\n" +

"<img src=\"").append("");

       //提前转换过base64编码的图片,由于编码文本太长,我提前存储到txt中,再通过io流读取出来

File file = new File("/Users/lei/base.txt");

FileInputStream fileInputStream = new FileInputStream(file);

BufferedInputStream bis = new BufferedInputStream(fileInputStream);

byte[] bytes = new byte[1024];

int len = -1;

while ((len=bis.read(bytes))!=-1){

sb.append(new String(bytes, 0, len));

}

sb.append("\">这是base64编码后的图片");

String tmpContent = sb.toString();

// 获取img图片标签

// 1.Jsoup解析html

Document document = Jsoup.parse(tmpContent);

// 获取所有img图片标签

Elements imgs = document.getElementsByTag("img");

int index = 0;

List<String> imgBase64List = new ArrayList<>();

for (Element element : imgs) {

imgBase64List.add(element.attr("src"));

// 处理特殊符号

String attrData = element.attr("src");

// base64编码后可能包含 + 特殊字符,所以需要转义

attrData = attrData.replaceAll("\\+", "\\\\+");

tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}");

index++;

}

// 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word

index = 0;

String prefix = "data:image/png;base64,"; // base64编码前缀

for (String base64 : imgBase64List) {

if (StringUtils.isNotBlank(base64)) {

// 缩小图片

base64 = base64.replaceAll(prefix, "");

BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64));

if (bufferedImage == null) {

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");

} else {

int height = bufferedImage.getHeight();

int width = bufferedImage.getWidth();

// 如果图片宽度大于650,图片缩放

System.out.println("----"+width+"-----"+height);

if (width > 650) {

//高度等比缩放

height = (int)(height*650.0/width);

BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 650, height);

String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + imageToBase64);

} else {

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + base64);

}

}

} else {

tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");

}

index++;

}

// 执行导出操作

WordUtil.exportHtmlToWord("/Users/lei/", tmpContent, "富文本内容导出word.docx");

} catch (Exception e) {

e.printStackTrace();

}

}

}

WordUtil工具类

package com.zl.exportword;

import org.apache.poi.poifs.filesystem.DirectoryEntry;

import org.apache.poi.poifs.filesystem.DocumentEntry;

import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import javax.servlet.ServletOutputStream;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import java.io.ByteArrayInputStream;

import java.io.File;

import java.io.FileOutputStream;

/**

* poi操作word工具类

* @author lei

* @version 1.0

* @date 2022/11/14 10:23

*/

public class WordUtil {

/**

* 导出富文本内容到word

* @param request

* @param response

* @param content 输出内容

* @param fileName 导出文件名称

* @throws Exception

*/

public static void exportHtmlToWord(HttpServletRequest request, HttpServletResponse response, String content, String fileName) throws Exception {

// 拼接html格式内容

StringBuffer sbf = new StringBuffer();

// 这里拼接一下html标签,便于word文档能够识别

sbf.append("<html " +

"xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图

">");

sbf.append("<head>" +

"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +

"</head>");

sbf.append("<body>");

// 富文本内容

sbf.append(content);

sbf.append("</body></html>");

// 必须要设置编码,避免中文就会乱码

byte[] b = sbf.toString().getBytes("GBK");

// 将字节数组包装到流中

ByteArrayInputStream bais = new ByteArrayInputStream(b);

POIFSFileSystem poifs = new POIFSFileSystem();

DirectoryEntry directory = poifs.getRoot();

// 这代码不能省略,否则导出乱码。

DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);

//输出文件

request.setCharacterEncoding("utf-8");

// 导出word格式

response.setContentType("application/msword");

response.addHeader("Content-Disposition", "attachment;filename=" +

new String(fileName.getBytes("GB2312"),"iso8859-1") + ".doc");

ServletOutputStream ostream = response.getOutputStream();

poifs.writeFilesystem(ostream);

bais.close();

ostream.close();

}

/**

* 富文本内容到word(本地)

* @param content 输出内容

* @param fileName 导出文件名称

* @throws Exception

*/

public static void exportHtmlToWord(String filepath, String content, String fileName) throws Exception {

// 拼接html格式内容

StringBuffer sbf = new StringBuffer();

// 这里拼接一下html标签,便于word文档能够识别

sbf.append("<html " +

"xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图

">");

sbf.append("<head>" +

"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +

"</head>");

sbf.append("<body>");

// 富文本内容

sbf.append(content);

sbf.append("</body></html>");

// 必须要设置编码,避免中文就会乱码

byte[] b = sbf.toString().getBytes("GBK");

// 将字节数组包装到流中

ByteArrayInputStream bais = new ByteArrayInputStream(b);

POIFSFileSystem poifs = new POIFSFileSystem();

DirectoryEntry directory = poifs.getRoot();

// 这代码不能省略,否则导出乱码。

DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);

FileOutputStream out = new FileOutputStream(new File(filepath+fileName));

poifs.writeFilesystem(out);

bais.close();

out.close();

}

}

图片处理ImageUtils工具类

package com.zl.exportword;

import sun.misc.BASE64Decoder;

import sun.misc.BASE64Encoder;

import javax.imageio.ImageIO;

import java.awt.*;

import java.awt.image.BufferedImage;

import java.io.ByteArrayInputStream;

import java.io.ByteArrayOutputStream;

import java.io.IOException;

/**

* 图片处理工具类

* @author lei

* @date 2022/11/14 10:20

* @version 1.0

*/

public class ImageUtils {

/**

* 通过BufferedImage图片流调整图片大小

*/

public static BufferedImage resizeImage(BufferedImage originalImage, int targetWidth, int targetHeight) throws IOException {

Image resultingImage = originalImage.getScaledInstance(targetWidth, targetHeight, Image.SCALE_AREA_AVERAGING);

BufferedImage outputImage = new BufferedImage(targetWidth, targetHeight, BufferedImage.TYPE_INT_RGB);

outputImage.getGraphics().drawImage(resultingImage, 0, 0, null);

return outputImage;

}

/**

* 返回base64图片

* @param data

* @return

*/

public static String imageToBase64(byte[] data) {

BASE64Encoder encoder = new BASE64Encoder();

// 返回Base64编码过的字节数组字符串

return encoder.encode(data);

}

/**

* base64转换成byte数组

* @param base64

* @return

* @throws IOException

*/

public static byte[] base64ToByte(String base64) throws IOException {

BASE64Decoder decoder = new BASE64Decoder();

// 返回Base64编码过的字节数组字符串

return decoder.decodeBuffer(base64);

}

/**

* BufferedImage图片流转byte[]数组

*/

public static byte[] imageToBytes(BufferedImage bImage) {

ByteArrayOutputStream out = new ByteArrayOutputStream();

try {

ImageIO.write(bImage, "png", out);

} catch (IOException e) {

e.printStackTrace();

}

return out.toByteArray();

}

/**

* byte[]数组转BufferedImage图片流

*/

public static BufferedImage bytesToBufferedImage(byte[] ImageByte) {

ByteArrayInputStream in = new ByteArrayInputStream(ImageByte);

BufferedImage image = null;

try {

image = ImageIO.read(in);

} catch (IOException e) {

e.printStackTrace();

}

return image;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值