批量html转word 或者 pdf

最新推荐文章于 2025-04-14 10:13:36 发布

程序员小单

最新推荐文章于 2025-04-14 10:13:36 发布

阅读量1.5k

点赞数

分类专栏：技术人生

本文链接：https://blog.csdn.net/sswltt/article/details/101362020

版权

代码中用到了jacob首先要下载相应版本的.dll放到jdk 和 jre 的bin目录下面可参考https://www.cnblogs.com/liudaihuablogs/p/9761297.html

maven

		<dependency>
			<groupId>cn.afterturn</groupId>
			<artifactId>easypoi-base</artifactId>
			<version>4.1.0</version>
		</dependency>
		<!-- easypoi-->
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-scratchpad</artifactId>
			<version>3.14</version>
		</dependency>

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>3.14</version>
		</dependency>

		<dependency>
			<groupId>fr.opensagres.xdocreport</groupId>
			<artifactId>xdocreport</artifactId>
			<version>1.0.6</version>
		</dependency>

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml-schemas</artifactId>
			<version>3.14</version>
		</dependency>

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>ooxml-schemas</artifactId>
			<version>1.3</version>
		</dependency>
		<dependency>
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.11.3</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
			<version>3.14</version>
		</dependency>
		<dependency>
			<groupId>net.sf.jacob-project</groupId>
			<artifactId>jacob</artifactId>
			<version>1.14.3</version>
		</dependency>

@Controller
public class TempController {

    @Autowired
    private TempService tempService;

    @RequestMapping("/baogao")
    public void execute() throws  Exception{
        // 拼接html
        List<InfoDetailResponse> report = tempService.getReport();
    }



}

package com.meadin.service.front.service.impl;

import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.meadin.common.module.response.InfoDetailResponse;
import com.meadin.service.front.dao.TempDao;
import com.meadin.service.front.service.TempService;
import com.meadin.service.front.util.CustomXWPFDocument;
import com.meadin.service.front.util.MSOfficeGeneratorUtils;
import com.meadin.service.front.util.OfficeUtil;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import org.springframework.util.ResourceUtils;
import org.springframework.web.util.HtmlUtils;

import javax.annotation.Resource;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @Description:
 * @Auther:
 * @Date: 2019/9/20 16:24
 * @version:
 */
@Service
public class TempServiceImpl implements TempService {
    @Resource
    private TempDao tempDao;

    public List<InfoDetailResponse> getReport(){
        List<InfoDetailResponse> report = tempDao.getReport();
        for (InfoDetailResponse info:report) {
            try {
                String html = getHtml(info.getTitle(), info.getMetaDescription(), info.getText());
                writeWordFile(html,info);
                String name=info.getTitle();
                String docFile="D:\\report\\"+name+".docx";
                String pdf="D:\\meadinPdf\\"+name+".pdf";
               // docToPdf(docFile,pdf);
            }catch (Exception e){
                e.printStackTrace();
            }

        }
        return report;
    }
    public String getHtml(String title,String mets,String text) throws Exception {
        String html1="<!DOCTYPE html>\n" +
                "<html>\n" +
                "  <head>\n" +
                "      <meta charset=\"utf-8\"/>\n" +
                "      <meta name=\"renderer\" content=\"webkit\" />\n" +
                "      <meta http-equiv=\"X-UA-Compatible\" content=\"IE=EDGE,chrome=1\" />\n" +
                "      <meta content=\"user-scalable=no, width=1200, initial-scale=1, maximum-scale=1.0\"  name=\"viewport\"/>\n" +
                "  </head>\n" +
                "  <body>\n" +
                "    <div class=\"box-body view-container\">\n" +
                "\t\t\t <h2 style=\"text-align:center;\"> ";
        String html2="</h2>\n" +
                "\t  \t<p  style=\"text-align:center;\" class=\"ext\">\n" +
                "\t  \t\t<font color=\"gray\">迈点研究院</font>\n" +
                "\t  \t</p>\t\t\n" +
                "\t\t<p> <font color=\"gray\">";

        String html3="</font></p>\n" +
                "\t  \t<div class=\"text\">";

        String html4="<div>\n" +
                "  </body>\n" +
                "</html>";
        String html = html1 + title + html2 + mets + html3 + text + html4;

        return html;
    }


    public   String writeWordFile(String content,InfoDetailResponse info) {
        int id=info.getId();
        String name=info.getTitle();
        String docFile="D:\\report\\"+name+".docx";
        String path = "D:/wordFile";
        Map<String, Object> param = new HashMap<String, Object>();

        if (!"".equals(path)) {
            File fileDir = new File(path);
            if (!fileDir.exists()) {
                fileDir.mkdirs();
            }
            content = HtmlUtils.htmlUnescape(content);
            List<HashMap<String, String>> imgs = getImgStr(content);
            int count = 0;
            for (HashMap<String, String> img : imgs) {
                count++;
                //处理替换以“/>”结尾的img标签
                content = content.replace(img.get("img"), "${imgReplace" + count + "}");
                //处理替换以“>”结尾的img标签
                content = content.replace(img.get("img1"), "${imgReplace" + count + "}");
                //处理替换以“ />”结尾的img标签
                content = content.replace(img.get("img2"), "${imgReplace" + count + "}");
                Map<String, Object> header = new HashMap<String, Object>();

                try {
                    File filePath = new File(ResourceUtils.getURL("classpath:").getPath());
                    String imagePath = img.get("src");
                    //如果没有宽高属性，默认设置为400*300
                    int[] imgWH = getImgWH(imagePath);
                    int w= imgWH[0];
                    int h= imgWH[1];
                    if (w>500){
                        double a= 500d/w;
                        double h1= a*h;
                        h=(int)h1;
                        w=500;
                    }
                    header.put("width",w);
                    header.put("height",h);
                    header.put("type", "jpg");
                    header.put("content", OfficeUtil.inputStream2ByteArray(getImageStream(imagePath), true));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
                param.put("${imgReplace" + count + "}", header);
            }
            try {
                // 生成doc格式的word文档，需要手动改为docx
                byte by[] = content.getBytes("UTF-8");
                ByteArrayInputStream bais = new ByteArrayInputStream(by);
                POIFSFileSystem poifs = new POIFSFileSystem();
                DirectoryEntry directory = poifs.getRoot();
                DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
                String pathTemp="D:\\wordFile\\"+name+"temp.doc";
                FileOutputStream ostream = new FileOutputStream(pathTemp);
                poifs.writeFilesystem(ostream);
                bais.close();
                ostream.close();
                getWord(name);
                String pathMod="D:\\wordFile\\"+name+"mod.doc";
                CustomXWPFDocument doc = OfficeUtil.generateWord(param, pathMod);
                //最终生成的带图片的word文件
                FileOutputStream fopts = new FileOutputStream(docFile);
                doc.write(fopts);
                fopts.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return "D:/wordFile/final.docx";
    }

    public InputStream getImageStream(String url) {
        try {
            HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
            connection.setReadTimeout(50000);
            connection.setConnectTimeout(50000);
            connection.setRequestMethod("GET");
            if (connection.getResponseCode() == HttpURLConnection.HTTP_OK) {
                InputStream inputStream = connection.getInputStream();
                return inputStream;
            }
        } catch (IOException e) {
            System.out.println("获取网络图片出现异常，图片路径为：" + url);
            e.printStackTrace();
        }
        return null;
    }


    //获取html中的图片元素信息
    public   List<HashMap<String, String>> getImgStr(String htmlStr) {
        List<HashMap<String, String>> pics = new ArrayList<HashMap<String, String>>();

        Document doc = Jsoup.parse(htmlStr);
        Elements imgs = doc.select("img");
        for (Element img : imgs) {
            HashMap<String, String> map = new HashMap<String, String>();
            if(!"".equals(img.attr("width"))) {
                map.put("width", img.attr("width").substring(0, img.attr("width").length() - 2));
            }
            if(!"".equals(img.attr("height"))) {
                map.put("height", img.attr("height").substring(0, img.attr("height").length() - 2));
            }
            map.put("img", img.toString().substring(0, img.toString().length() - 1) + "/>");
            map.put("img2", img.toString().substring(0, img.toString().length() - 1) + " />");
            map.put("img1", img.toString());
            map.put("src", img.attr("src"));
            pics.add(map);
        }
        return pics;
    }


    public  void getWord(String name) {
        // 复制空白文档-粘贴到临时文档（相当于手动执行copy_paste）
        MS

最低0.47元/天解锁文章