在Java中经常会碰到富文本使用场景,有些需求则需要将富文本内容转换为Word,并包含图片,没接触过的同学可能一开始会感觉头大,其实可以使用Apache POI库来进行实现。以下是一个简单的例子。
引入本次需要的Maven依赖包
<!--富文本转word begin-->
<!-- jsoup依赖 主要是解析图片标签,然后缩放图片大小-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<!-- poi依赖-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<!--富文本转word end-->
一:创建工具类
1.1、新增图片处理工具类
import cn.hutool.core.util.StrUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Base64;
import java.net.URL;
/**
* @ClassName ImageUtils
* @Description: 图片处理工具类
* @Author bigearchart
* @Date 2024/5/20
* @Version V1.0
**/
@Component
@Slf4j
public class ImageUtils {
/**
* 通过BufferedImage图片流调整图片大小
*/
public static BufferedImage resizeImage(BufferedImage originalImage, int targetWidth, int targetHeight) throws IOException {
Image resultingImage = originalImage.getScaledInstance(targetWidth, targetHeight, Image.SCALE_AREA_AVERAGING);
BufferedImage outputImage = new BufferedImage(targetWidth, targetHeight, BufferedImage.TYPE_INT_RGB);
outputImage.getGraphics().drawImage(resultingImage, 0, 0, null);
return outputImage;
}
/**
* 返回base64图片
* @param data
* @return
*/
public static String imageToBase64(byte[] data) {
BASE64Encoder encoder = new BASE64Encoder();
// 返回Base64编码过的字节数组字符串
return encoder.encode(data);
}
/**
* base64转换成byte数组
* @param base64
* @return
* @throws IOException
*/
public static byte[] base64ToByte(String base64) throws IOException {
BASE64Decoder decoder = new BASE64Decoder();
// 返回Base64编码过的字节数组字符串
return decoder.decodeBuffer(base64);
}
/**
* BufferedImage图片流转byte[]数组
*/
public static byte[] imageToBytes(BufferedImage bImage) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
ImageIO.write(bImage, "png", out);
} catch (IOException e) {
e.printStackTrace();
}
return out.toByteArray();
}
/**
* byte[]数组转BufferedImage图片流
*/
public static BufferedImage bytesToBufferedImage(byte[] ImageByte) {
ByteArrayInputStream in = new ByteArrayInputStream(ImageByte);
BufferedImage image = null;
try {
image = ImageIO.read(in);
} catch (IOException e) {
e.printStackTrace();
}
return image;
}
/**
* 在线图片资源转base
* @param imageUrl
* @return
* @throws IOException
*/
public static String convertToBase64(String imageUrl) throws IOException {
URL url = new URL(imageUrl);
String fileType = imageUrl.substring(imageUrl.length()-3);
String base64Str = "data:" + fileType + ";base64,";
InputStream inputStream = url.openStream();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
byte[] imageBytes = outputStream.toByteArray();
String base64String = base64Str + Base64.getEncoder().encodeToString(imageBytes);
return base64String;
}
//图片转化成base64字符串
public static String getImageStr(String imgPath) throws IOException {
File file = new File(imgPath);
String fileContentBase64 = null;
if(file.exists()){
String fileType = imgPath.substring(imgPath.length()-3);
String base64Str = "data:" + fileType + ";base64,";
String content = null;
//将图片文件转化为字节数组字符串,并对其进行Base64编码处理
InputStream in = null;
byte[] data = null;
//读取图片字节数组
try {
in = new FileInputStream(file);
data = new byte[in.available()];
in.read(data);
in.close();
//对字节数组Base64编码
if (data == null || data.length == 0) {
return null;
}
//content = Base64.encodeBytes(data);
content = new BASE64Encoder().encode(data);
if (content == null || "".equals(content)) {
return null;
}
// 缩小图片
if (StrUtil.isNotBlank(content)) {
BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(content));
if (bufferedImage != null){
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
// 如果图片宽度大于650,图片缩放
if (width > 500) {
//高度等比缩放
height = (int)(height*500.0/width);
BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 500, height);
content = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
}
}
}
fileContentBase64 = base64Str + content;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
in.close();
}
}
}
return fileContentBase64;
}
}
1.2、新增Word工具类
import cn.hutool.core.util.StrUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.entity.ContentType;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jeecg.config.oss.MyMultipartFile;
import org.jeecg.config.oss.TencentCOSUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* @ClassName WordUtil
* @Description: poi操作word工具类
* @Author bigearchart
* @Date 2024/5/20
* @Version V1.0
**/
@Component
@Slf4j
public class WordUtil {
@Resource
private TencentCOSUtils tencentCOSUtils;
/**
* 导出富文本内容到word
* @param request
* @param response
* @param content 输出内容
* @param fileName 导出文件名称
* @throws Exception
*/
public static void exportHtmlToWord(HttpServletRequest request, HttpServletResponse response, String content, String fileName) throws Exception {
//图片转为base64方法
//String imagebase64 = getImageStr(imagePath);
// 拼接html格式内容
StringBuffer sbf = new StringBuffer();
// 这里拼接一下html标签,便于word文档能够识别
sbf.append("<html " +
"xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图
">");
sbf.append("<head>" +
"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
"</head>");
sbf.append("<body>");
// 富文本内容
sbf.append(content);
sbf.append("</body></html>");
// 必须要设置编码,避免中文就会乱码
byte[] b = sbf.toString().getBytes("GBK");
// 将字节数组包装到流中
ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
// 这代码不能省略,否则导出乱码。
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
//输出文件
request.setCharacterEncoding("utf-8");
// 导出word格式
response.setContentType("application/msword");
response.addHeader("Content-Disposition", "attachment;filename=" +
new String(fileName.getBytes("GB2312"),"iso8859-1") + ".doc");
ServletOutputStream ostream = response.getOutputStream();
poifs.writeFilesystem(ostream);
bais.close();
ostream.close();
}
/**
* 富文本内容到word---指定路径
* @param content 输出内容
* @param fileName 导出文件名称
* @throws Exception
*/
public static void exportHtmlToWord(String filepath, String content, String fileName) throws Exception {
// 拼接html格式内容
StringBuffer sbf = new StringBuffer();
// 这里拼接一下html标签,便于word文档能够识别
sbf.append("<html " +
"xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图
">");
sbf.append("<head>" +
"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
"</head>");
sbf.append("<body>");
// 富文本内容
sbf.append(content);
sbf.append("</body></html>");
// 必须要设置编码,避免中文就会乱码
byte[] b = sbf.toString().getBytes("GBK");
// 将字节数组包装到流中
ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
// 这代码不能省略,否则导出乱码。
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
FileOutputStream out = new FileOutputStream(new File(filepath + fileName));
poifs.writeFilesystem(out);
bais.close();
out.close();
}
/**
* 富文本内容到word---腾讯cos
* @param content 输出内容
* @param fileName 导出文件名称
* @throws Exception
*/
public String cosHtmlToWord( String content, String fileName) throws Exception {
// 拼接html格式内容
StringBuffer sbf = new StringBuffer();
// 这里拼接一下html标签,便于word文档能够识别
sbf.append("<html " +
"xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图
">");
sbf.append("<head>" +
"<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
"</head>");
sbf.append("<body>");
// 富文本内容
sbf.append(content);
sbf.append("</body></html>");
// 必须要设置编码,避免中文就会乱码
byte[] fileBytes = sbf.toString().getBytes("GBK");
InputStream inputStream = new ByteArrayInputStream(fileBytes);
MultipartFile file = new MyMultipartFile(fileName,fileName, ContentType.APPLICATION_OCTET_STREAM.toString(), inputStream);
String url = tencentCOSUtils.upload(file, "file");
log.info("\n\t==========文件地址:" +url);
return url;
}
}
二:业务封装处理
ps:这里的富文本内容需要注意一件事,如果内容中含有图片,那么需要将图片的URL转为Base64编码,不然转换为Word文档后图片是会丢失的!!!
/**
* 富文本转word 加图片处理
* @param content --- 富文本内容
* @param fileName --- 文件名称
*/
public String export( String content, String fileName) {
try {
StringBuilder sb = new StringBuilder(content);
String tmpContent = sb.toString();
// 获取img图片标签
// 1.Jsoup解析html
Document document = Jsoup.parse(tmpContent);
// 获取所有img图片标签
Elements imgs = document.getElementsByTag("img");
int index = 0;
List<String> imgBase64List = new ArrayList<>();
for (Element element : imgs) {
imgBase64List.add(element.attr("src"));
// 处理特殊符号
String attrData = element.attr("src");
// base64编码后可能包含 + 特殊字符,所以需要转义
attrData = attrData.replaceAll("\\+", "\\\\+");
tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}");
index++;
}
// 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word
index = 0;
String[] prefixStr = {"data:image/png;base64,","data:image/jpeg;base64,"}; // base64编码前缀
String prefix = null; // base64编码前缀
for (String base64 : imgBase64List) {
if (StrUtil.isNotBlank(base64)) {
for (String str: prefixStr) {
base64 = base64.replaceAll(str, "");
prefix = str;
}
// 缩小图片
BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64));
if (bufferedImage == null) {
tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");
} else {
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
// 如果图片宽度大于650,图片缩放
System.out.println("----"+width+"-----"+height);
if (width > 650) {
//高度等比缩放
height = (int)(height*650.0/width);
BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 650, height);
String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + imageToBase64);
} else {
tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + base64);
}
}
} else {
tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");
}
index++;
}
// 执行导出操作
return wordUtil.cosHtmlToWord( tmpContent, fileName);
} catch (Exception e) {
e.printStackTrace();
log.error("=========================富文本生成word失败");
return null;
}
}
后续就是基于业务情况,来进行实际的业务封装操作即可,例如
至此本文总结结束,如果对您有帮助 请点个关注,万分感谢