Springboot实现富文本(Html)内容转Word(包含图片)

在Java中经常会碰到富文本使用场景,有些需求则需要将富文本内容转换为Word,并包含图片,没接触过的同学可能一开始会感觉头大,其实可以使用Apache POI库来进行实现。以下是一个简单的例子。

引入本次需要的Maven依赖包


<!--富文本转word begin-->
    <!-- jsoup依赖  主要是解析图片标签,然后缩放图片大小-->
    <dependency>
      <groupId>org.jsoup</groupId>
      <artifactId>jsoup</artifactId>
      <version>1.12.1</version>
    </dependency>
    <!-- poi依赖-->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi</artifactId>
      <version>4.1.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.1.0</version>
    </dependency>
<!--富文本转word end-->

一:创建工具类
1.1、新增图片处理工具类

import cn.hutool.core.util.StrUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Base64;
import java.net.URL;

/**
 * @ClassName ImageUtils
 * @Description: 图片处理工具类
 * @Author bigearchart
 * @Date 2024/5/20
 * @Version V1.0
 **/
@Component
@Slf4j
public class ImageUtils {
    /**
     * 通过BufferedImage图片流调整图片大小
     */
    public static BufferedImage resizeImage(BufferedImage originalImage, int targetWidth, int targetHeight) throws IOException {
        Image resultingImage = originalImage.getScaledInstance(targetWidth, targetHeight, Image.SCALE_AREA_AVERAGING);
        BufferedImage outputImage = new BufferedImage(targetWidth, targetHeight, BufferedImage.TYPE_INT_RGB);
        outputImage.getGraphics().drawImage(resultingImage, 0, 0, null);
        return outputImage;
    }

    /**
     * 返回base64图片
     * @param data
     * @return
     */
    public static String imageToBase64(byte[] data) {
        BASE64Encoder encoder = new BASE64Encoder();
        // 返回Base64编码过的字节数组字符串
        return encoder.encode(data);
    }

    /**
     * base64转换成byte数组
     * @param base64
     * @return
     * @throws IOException
     */
    public static byte[] base64ToByte(String base64) throws IOException {
        BASE64Decoder decoder = new BASE64Decoder();
        // 返回Base64编码过的字节数组字符串
        return decoder.decodeBuffer(base64);
    }

    /**
     * BufferedImage图片流转byte[]数组
     */
    public static byte[] imageToBytes(BufferedImage bImage) {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        try {
            ImageIO.write(bImage, "png", out);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return out.toByteArray();
    }

    /**
     * byte[]数组转BufferedImage图片流
     */
    public static BufferedImage bytesToBufferedImage(byte[] ImageByte) {
        ByteArrayInputStream in = new ByteArrayInputStream(ImageByte);
        BufferedImage image = null;
        try {
            image = ImageIO.read(in);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return image;
    }

    /**
     * 在线图片资源转base
     * @param imageUrl
     * @return
     * @throws IOException
     */
    public static String convertToBase64(String imageUrl) throws IOException {
        URL url = new URL(imageUrl);
        String fileType = imageUrl.substring(imageUrl.length()-3);
        String base64Str = "data:" + fileType + ";base64,";
        InputStream inputStream = url.openStream();
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        byte[] buffer = new byte[4096];
        int bytesRead;
        while ((bytesRead = inputStream.read(buffer)) != -1) {
            outputStream.write(buffer, 0, bytesRead);
        }
        byte[] imageBytes = outputStream.toByteArray();
        String base64String = base64Str + Base64.getEncoder().encodeToString(imageBytes);
        return base64String;
    }

    //图片转化成base64字符串
    public static String getImageStr(String imgPath) throws IOException {
        File file = new File(imgPath);
        String fileContentBase64 = null;
        if(file.exists()){
            String fileType = imgPath.substring(imgPath.length()-3);
            String base64Str = "data:" + fileType + ";base64,";
            String content = null;
            //将图片文件转化为字节数组字符串,并对其进行Base64编码处理
            InputStream in = null;
            byte[] data = null;
            //读取图片字节数组
            try {
                in = new FileInputStream(file);
                data = new byte[in.available()];
                in.read(data);
                in.close();
                //对字节数组Base64编码
                if (data == null || data.length == 0) {
                    return null;
                }
                //content = Base64.encodeBytes(data);
                content = new BASE64Encoder().encode(data);
                if (content == null || "".equals(content)) {
                    return null;
                }
                // 缩小图片
                if (StrUtil.isNotBlank(content)) {
                    BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(content));
                    if (bufferedImage != null){
                        int height = bufferedImage.getHeight();
                        int width = bufferedImage.getWidth();
                        // 如果图片宽度大于650,图片缩放
                        if (width > 500) {
                            //高度等比缩放
                            height = (int)(height*500.0/width);
                            BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 500, height);
                            content = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
                        }
                    }
                }
                fileContentBase64 = base64Str + content;
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                if (in != null) {
                    in.close();
                }
            }
        }

        return fileContentBase64;
    }
}

1.2、新增Word工具类

import cn.hutool.core.util.StrUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.entity.ContentType;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jeecg.config.oss.MyMultipartFile;
import org.jeecg.config.oss.TencentCOSUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;

import javax.annotation.Resource;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName WordUtil
 * @Description: poi操作word工具类
 * @Author bigearchart
 * @Date 2024/5/20
 * @Version V1.0
 **/
@Component
@Slf4j
public class WordUtil {

    @Resource
    private TencentCOSUtils tencentCOSUtils;

    /**
     * 导出富文本内容到word
     * @param request
     * @param response
     * @param content 输出内容
     * @param fileName 导出文件名称
     * @throws Exception
     */
    public static void exportHtmlToWord(HttpServletRequest request, HttpServletResponse response, String content, String fileName) throws Exception {
        //图片转为base64方法
        //String imagebase64 = getImageStr(imagePath);
        // 拼接html格式内容
        StringBuffer sbf = new StringBuffer();
        // 这里拼接一下html标签,便于word文档能够识别
        sbf.append("<html " +
                "xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图
                ">");
        sbf.append("<head>" +
                "<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
                "</head>");
        sbf.append("<body>");
        // 富文本内容
        sbf.append(content);
        sbf.append("</body></html>");

        // 必须要设置编码,避免中文就会乱码
        byte[] b = sbf.toString().getBytes("GBK");
        // 将字节数组包装到流中
        ByteArrayInputStream bais = new ByteArrayInputStream(b);
        POIFSFileSystem poifs = new POIFSFileSystem();
        DirectoryEntry directory = poifs.getRoot();
        // 这代码不能省略,否则导出乱码。
        DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
        //输出文件
        request.setCharacterEncoding("utf-8");
        // 导出word格式
        response.setContentType("application/msword");
        response.addHeader("Content-Disposition", "attachment;filename=" +
                new String(fileName.getBytes("GB2312"),"iso8859-1") + ".doc");
        ServletOutputStream ostream = response.getOutputStream();
        poifs.writeFilesystem(ostream);
        bais.close();
        ostream.close();
    }

    /**
     * 富文本内容到word---指定路径
     * @param content 输出内容
     * @param fileName 导出文件名称
     * @throws Exception
     */
    public static void exportHtmlToWord(String filepath, String content, String fileName) throws Exception {
        // 拼接html格式内容
        StringBuffer sbf = new StringBuffer();
        // 这里拼接一下html标签,便于word文档能够识别
        sbf.append("<html " +
                "xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图
                ">");
        sbf.append("<head>" +
                "<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
                "</head>");
        sbf.append("<body>");
        // 富文本内容
        sbf.append(content);
        sbf.append("</body></html>");

        // 必须要设置编码,避免中文就会乱码
        byte[] b = sbf.toString().getBytes("GBK");
        // 将字节数组包装到流中
        ByteArrayInputStream bais = new ByteArrayInputStream(b);
        POIFSFileSystem poifs = new POIFSFileSystem();
        DirectoryEntry directory = poifs.getRoot();
        // 这代码不能省略,否则导出乱码。
        DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);

        FileOutputStream out = new FileOutputStream(new File(filepath + fileName));
        poifs.writeFilesystem(out);
        bais.close();
        out.close();
    }

    /**
     * 富文本内容到word---腾讯cos
     * @param content 输出内容
     * @param fileName 导出文件名称
     * @throws Exception
     */
    public  String cosHtmlToWord( String content, String fileName) throws Exception {
        // 拼接html格式内容
        StringBuffer sbf = new StringBuffer();
        // 这里拼接一下html标签,便于word文档能够识别
        sbf.append("<html " +
                "xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\"" + //将版式从web版式改成页面试图
                ">");
        sbf.append("<head>" +
                "<!--[if gte mso 9]><xml><w:WordDocument><w:View>Print</w:View><w:TrackMoves>false</w:TrackMoves><w:TrackFormatting/><w:ValidateAgainstSchemas/><w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid><w:IgnoreMixedContent>false</w:IgnoreMixedContent><w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText><w:DoNotPromoteQF/><w:LidThemeOther>EN-US</w:LidThemeOther><w:LidThemeAsian>ZH-CN</w:LidThemeAsian><w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript><w:Compatibility><w:BreakWrappedTables/><w:SnapToGridInCell/><w:WrapTextWithPunct/><w:UseAsianBreakRules/><w:DontGrowAutofit/><w:SplitPgBreakAndParaMark/><w:DontVertAlignCellWithSp/><w:DontBreakConstrainedForcedTables/><w:DontVertAlignInTxbx/><w:Word11KerningPairs/><w:CachedColBalance/><w:UseFELayout/></w:Compatibility><w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel><m:mathPr><m:mathFont m:val=\"Cambria Math\"/><m:brkBin m:val=\"before\"/><m:brkBinSub m:val=\"--\"/><m:smallFrac m:val=\"off\"/><m:dispDef/><m:lMargin m:val=\"0\"/> <m:rMargin m:val=\"0\"/><m:defJc m:val=\"centerGroup\"/><m:wrapIndent m:val=\"1440\"/><m:intLim m:val=\"subSup\"/><m:naryLim m:val=\"undOvr\"/></m:mathPr></w:WordDocument></xml><![endif]-->" +
                "</head>");
        sbf.append("<body>");
        // 富文本内容
        sbf.append(content);
        sbf.append("</body></html>");

        // 必须要设置编码,避免中文就会乱码
        byte[] fileBytes = sbf.toString().getBytes("GBK");
        InputStream inputStream = new ByteArrayInputStream(fileBytes);
        MultipartFile file = new MyMultipartFile(fileName,fileName, ContentType.APPLICATION_OCTET_STREAM.toString(), inputStream);
        String url = tencentCOSUtils.upload(file, "file");
        log.info("\n\t==========文件地址:" +url);
        return url;
    }
}

二:业务封装处理

ps:这里的富文本内容需要注意一件事,如果内容中含有图片,那么需要将图片的URL转为Base64编码,不然转换为Word文档后图片是会丢失的!!!

/**
     * 富文本转word 加图片处理
     * @param content --- 富文本内容
     * @param fileName --- 文件名称
     */
    public String export( String content, String fileName) {
        try {
            StringBuilder sb = new StringBuilder(content);
            String tmpContent = sb.toString();

            // 获取img图片标签
            // 1.Jsoup解析html
            Document document = Jsoup.parse(tmpContent);
            // 获取所有img图片标签
            Elements imgs = document.getElementsByTag("img");
            int index = 0;
            List<String> imgBase64List = new ArrayList<>();
            for (Element element : imgs) {
                imgBase64List.add(element.attr("src"));
                // 处理特殊符号
                String attrData = element.attr("src");
                // base64编码后可能包含 + 特殊字符,所以需要转义
                attrData = attrData.replaceAll("\\+", "\\\\+");
                tmpContent = tmpContent.replaceAll(attrData, "{{image_src" + index + "}}");
                index++;
            }
            // 缩放图片大小,然后重新base64编码后替换到富文本内容里面导出word
            index = 0;
            String[] prefixStr = {"data:image/png;base64,","data:image/jpeg;base64,"}; // base64编码前缀
            String prefix = null; // base64编码前缀

            for (String base64 : imgBase64List) {
                if (StrUtil.isNotBlank(base64)) {
                    for (String str: prefixStr) {
                        base64 = base64.replaceAll(str, "");
                        prefix = str;
                    }
                    // 缩小图片
                    BufferedImage bufferedImage = ImageUtils.bytesToBufferedImage(ImageUtils.base64ToByte(base64));
                    if (bufferedImage == null) {
                        tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");
                    } else {
                        int height = bufferedImage.getHeight();
                        int width = bufferedImage.getWidth();
                        // 如果图片宽度大于650,图片缩放
                        System.out.println("----"+width+"-----"+height);
                        if (width > 650) {
                            //高度等比缩放
                            height = (int)(height*650.0/width);
                            BufferedImage imgZoom = ImageUtils.resizeImage(bufferedImage, 650, height);
                            String imageToBase64 = ImageUtils.imageToBase64(ImageUtils.imageToBytes(imgZoom));
                            tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + imageToBase64);
                        } else {
                            tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", prefix + base64);
                        }
                    }
                } else {
                    tmpContent = tmpContent.replaceAll("\\{\\{image_src" + index + "}}", "");
                }
                index++;
            }
            // 执行导出操作
          return  wordUtil.cosHtmlToWord( tmpContent, fileName);
        } catch (Exception e) {
            e.printStackTrace();
            log.error("=========================富文本生成word失败");
            return null;
        }
    }

后续就是基于业务情况,来进行实际的业务封装操作即可,例如
在这里插入图片描述
至此本文总结结束,如果对您有帮助 请点个关注,万分感谢

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值