java 富文本html 转 word(带图片处理

softwareDragon

已于 2023-02-15 14:28:03 修改

阅读量3.1k

点赞数 6

分类专栏：工具文章标签： java word 富文本

于 2023-02-15 14:24:38 首次发布

本文链接：https://blog.csdn.net/qq_33348135/article/details/129042417

版权

工具专栏收录该内容

52 篇文章 0 订阅

订阅专栏

1.配置依赖
<dependency>
   <groupId>e-iceblue</groupId>
   <artifactId>spire.doc.free</artifactId>
   <version>3.9.0</version>
</dependency>
<dependency>
   <groupId>org.jsoup</groupId>
   <artifactId>jsoup</artifactId>
   <version>1.14.2</version>
</dependency>
		<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpclient</artifactId>
			<version>4.5.13</version>
		</dependency>
因e-iceblue在阿里云maven仓库中找不到，需配置特定仓库

<repository>
   <id>com.e-iceblue</id>
   <url>https://repo.e-iceblue.cn/repository/maven-public/</url>
</repository>
2.代码demo
import com.spire.doc.Document;
import com.spire.doc.FileFormat;
import com.spire.doc.Section;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.util.Base64;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class TestHtml2Doc {
    public static void main(String[] args) {
        long start = System.currentTimeMillis();
        String str="";
        try {
			//富文本内容在本地文件中，根据具体情况改成从数据库或网络中获取资源
            FileInputStream in=new FileInputStream("D:\\oo.txt");
            // size 为字串的长度 ，这里一次性读完
            int size=in.available();
            byte[] buffer=new byte[size];
            in.read(buffer);
            in.close();
            str=new String(buffer,"utf-8");
        } catch (IOException e) {
            e.printStackTrace();
        }
        org.jsoup.nodes.Document document = Jsoup.parse(str);

        Element body = document.body();
        // 获取body里面的所有子元素
        Elements links = body.children();
        try {
            for (Element link : links) {
                Elements img = link.select("img");
                String src = img.attr("src");
                if(StringUtils.isNotBlank(src) ) {
                    // 处理图片 因有些图片资源带有防盗链，需加上Referer
                    Map map=new HashMap() ;
                    map.put("Referer","http://aa.com.cn");

                    String string=doGetHeader(src,map);
                    img.attr("src","data:image/png;base64,"+string);
                    System.out.println("src = " + src);
                    System.out.println("Basestring = " + string);

                }
            }
        } catch (Exception e) {
            System.out.println("转换url图片报错了："+e.getMessage());
        }

        String html = document.html();
		//样式调整
        html=html.replace("class=\"ql-align-center\"","style=\"text-align:center\"").replace("<br></p>\n" +
                "  <p>","<br></p>\n" +
                "  <p>&emsp;&emsp;");
        //System.out.println("html = " + html);
        exportWord(html,"590");
        long end = System.currentTimeMillis();
        System.out.println("(end-start) = " + (end-start)/1000);
    }
    public static void exportWord( String content, String fileName) {

        try {
            HttpServletRequest request=null;
            HttpServletResponse response=null;
            //新建Document对象
            Document document = new Document();
            //添加section
            Section sec = document.addSection();
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            //添加段落并写入HTML文本
            sec.addParagraph().appendHTML(content);
            document.saveToStream(os, FileFormat.Docx);

            InputStream input = new ByteArrayInputStream(os.toByteArray());

            //输出文件
            FileOutputStream no=new FileOutputStream("D:\\"+fileName+".docx");
            int len =-1;
            byte []by = new byte[1024];
            while((len = input.read(by))!=-1) {
                no.write(by,0,len);
            }
            no.flush();
            no.close();
            input.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    /**
     * @Description HTTP header GET请求图片地址 返回base64
     * @param url 地址
     * @param
     */
    public static String doGetHeader(String url, Map<String, String> headers) {

        CloseableHttpResponse response = null;
        try (CloseableHttpClient client = HttpClients.createDefault()){
            HttpGet httpGet = new HttpGet(url);
            Iterator<Map.Entry<String, String>> iterator = headers.entrySet().iterator();
            //设置header信息
            while(iterator.hasNext()) {
                Map.Entry<String, String> entry = iterator.next();
                httpGet.setHeader(entry.getKey(), entry.getValue());
            }
            RequestConfig config = RequestConfig.custom().setConnectTimeout(60000)
                    .setConnectionRequestTimeout(60000)
                    .setSocketTimeout(60000)
                    .build();
            httpGet.setConfig(config);
            response = client.execute(httpGet);
            HttpEntity entity = response.getEntity();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            entity.writeTo(out);
            // ByteArrayOutputStream编码成base64字符串
            String result = new String(Base64.getEncoder().encode(out.toByteArray()));
            return result;
        }catch(Exception e) {
            System.out.println("httpClient请求图片url报错 " + e.getMessage());;
            return null;
        }finally {
            try {
                if(response != null) {
                    response.close();
                }
            }catch(Exception e) {
                System.out.println("关闭响应流报错："+e.getMessage());
            }
        }
    }
}