文档转pdf格式，实现在线预览

最新推荐文章于 2024-07-19 03:55:57 发布

nu11_

最新推荐文章于 2024-07-19 03:55:57 发布

阅读量603

点赞数 1

文章标签：在线预览文件格式转换

本文链接：https://blog.csdn.net/nu11_/article/details/78344692

版权

首先要下载openoffic工具

地址：http://www.openoffice.org/zh-cn/download/

安装好openoffice后在OpenOffice 4\program文件夹下打开cmd开启服务，

命令：soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard

maven需要引入的jar包

备注：jodconverter.2.2.2找不到该版的maven地址，低版本的不支持docx格式的文档转。所以把jar包放到webapp/openOffic文件下，引用了一下。

<dependency>
      <groupId>com.artofsolving</groupId><!--JAVA的包的结构-->
      <artifactId>jodconverter</artifactId><!--项目名-->
      <version>2.2.2</version>
      <scope>system</scope>
      <systemPath>${project.basedir}/src/main/webapp/openOffic/jodconverter-2.2.2.jar</systemPath>
    </dependency>
    <dependency>
      <groupId>commons-cli</groupId>
      <artifactId>commons-cli</artifactId>
      <version>1.2</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>juh</artifactId>
      <version>3.1.0</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>jurt</artifactId>
      <version>3.1.0</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>ridl</artifactId>
      <version>3.0.1</version>
    </dependency>
    <dependency>
      <groupId>org.openoffice</groupId>
      <artifactId>unoil</artifactId>
      <version>3.0.1</version>
    </dependency>
    <dependency>
      <groupId>com.thoughtworks.xstream</groupId>
      <artifactId>xstream</artifactId>
      <version>1.3.1</version>
    </dependency>

controller

/**
     * 在线预览
     * @param fileid
     */
    @RequestMapping(value = "view")
    public String view(String fileid){
        fileid = "4";
        try {
            SysFileVO fileVO = FileService.get(Integer.parseInt(fileid));
            //源文件地址  如：D://360Downloads/【详细设计】XXX平台.doc
            String filepath = fileVO.getNewName();
            //源文件改成pdf后的地址 如：D://360Downloads/【详细设计】XXX平台.pdf
            String pdfUrl = filepath.substring(0,filepath.lastIndexOf("."))+".pdf";
            OpenOfficUtil.doc2pdf(filepath,pdfUrl);
            return pdfUrl;
        }catch (Exception e){
            logger.info("查看异常",e);
        }
        return null;
    }

OpenOfficUtil工具类

该工具类也支持文档转HTML

package com.suneee.utils;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;

import java.io.*;
import java.net.ConnectException;
import java.nio.charset.Charset;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class OpenOfficUtil {
	
	
	//这里是OpenOffice的安装目录, 在我的项目中,为了便于拓展接口,没有直接写成这个样子,但是这样是绝对没问题的
	private static String OpenOffice_HOME="C:\\Program Files (x86)\\OpenOffice 4";
	private static String host="127.0.0.1";
	private static Integer port=8100;
	private static String commands="program\\soffice -headless -accept=\"socket,host="+host+",port="+port+";urp;\"";
	private static String filepath="/usr/local/ydyl/tomcat8/webapps/uploadimg";
    private static String imgUrl="/uploadimg/";
	private static OpenOfficeConnection connection= new SocketOpenOfficeConnection(port);
	

	/** 
     * 将word文档转换成html文档 
     * @param docFile   需要转换的word文档 
     * @param filepath  转换之后html的存放路径 
     * @return 转换之后的html文件 
     */  
    public static File convert(File docFile, String filepath, OpenOfficeConnection con) {  
  
        // 创建保存html的文件  
        File htmlFile = new File(filepath + "/" + new Date().getTime() + ".html");  
        System.out.println("--------zh1--------------"+new Date().getSeconds());
        // 创建转换器  
        DocumentConverter converter = new OpenOfficeDocumentConverter(con); 
        // 转换文档问html  
        converter.convert(docFile, htmlFile);  
        System.out.println("--------zhlast1s--------------"+new Date().getSeconds());
        // 关闭openoffice连接  
        con.disconnect();  
        return htmlFile;  
    }  
  
    /** 
     *  
     * 将word转换成html文件，并且获取html文件代码。 
     * @return 转换成功的html代码
     * @throws IOException 
     */  
    public static String toHtmlString(String url) throws IOException {
    	
        boolean connected = connection.isConnected();
        if(!connected){
        	connection= new SocketOpenOfficeConnection(port);
        }
    	File docFile = new File(url);
        // 转换word文档  
        File htmlFile = convert(docFile, filepath,connection);  
        // 获取html文件流
        StringBuffer htmlSb = new StringBuffer();  
        try {  
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlFile),Charset.forName("utf-8")));  
            while (br.ready()) {  
                htmlSb.append(br.readLine());  
            }  
            br.close();  
            // 删除临时文件  
            htmlFile.delete();  
        } catch (FileNotFoundException e) {
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        // HTML文件字符串  
        String htmlStr = htmlSb.toString();  
        // 返回经过清洁的html文本  
        return clearFormat(htmlStr, filepath);  
    }  
    /**
     *  
     * 清除一些不需要的html标记 
     *  
     *  
     *  
     * @param htmlStr 
     *  
     *            带有复杂html标记的html语句 
     *  
     * @return 去除了不需要html标记的语句 
     */  
    protected static String clearFormat(String htmlStr, String docImgPath) {
        // 获取body内容的正则
        String bodyReg = "<BODY .*</BODY>";  
        Pattern bodyPattern = Pattern.compile(bodyReg);  
        Matcher bodyMatcher = bodyPattern.matcher(htmlStr);  
        if (bodyMatcher.find()) {  
            // 获取BODY内容，并转化BODY标签为DIV  
            htmlStr = bodyMatcher.group().replaceFirst("<BODY", "<DIV").replaceAll("</BODY>", "</DIV>");  
        }  
        // 调整图片地址
        htmlStr = htmlStr.replaceAll("<IMG SRC=\"", "<IMG SRC=\"" + imgUrl);  
        // 把<P></P>转换成</div></div>保留样式  
        // content = content.replaceAll("(<P)([^>]*>.*?)(<\\/P>)",  
        // "<div$2</div>");  
        // 把<P></P>转换成</div></div>并删除样式  
        htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\\/P>)", "<p$3</p>");  
        // 删除不需要的标签  
        htmlStr = htmlStr.replaceAll("<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\\w+)[^>]*?>","");  
        // 删除不需要的属性  
        htmlStr = htmlStr.replaceAll("<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\\w+)=(?:'[^']*'|\"\"[^\"\"]*\"\"|[^>]+)([^>]*)>","<$1$2>");  
        return htmlStr;
    }
    public static void doc2pdf(String docUrl,String pdfUrl) throws Exception {
    	boolean connected = connection.isConnected();
        if(!connected){
        	connection= new SocketOpenOfficeConnection(port);
        }
    	File docFile = new File(docUrl);
    	File pdfFile = new File(pdfUrl);
        if (docFile.exists()) {  
            if (!pdfFile.exists()) {  
                try {
                    connection.connect();  
                    DocumentConverter converter = new OpenOfficeDocumentConverter(connection);  
                    converter.convert(docFile, pdfFile);  
                    // close the connection  
                    connection.disconnect();  
                    System.out.println("****pdf转换成功，PDF输出：" + pdfFile.getPath()+ "****");  
                } catch (ConnectException e) {
                    e.printStackTrace();  
                    System.out.println("****swf转换器异常，openoffice服务未启动！****");  
                    throw e;  
                } catch (com.artofsolving.jodconverter.openoffice.connection.OpenOfficeException e) {  
                    e.printStackTrace();  
                    System.out.println("****swf转换器异常，读取转换文件失败****");  
                    throw e;  
                } catch (Exception e) {  
                    e.printStackTrace();  
                    throw e;  
                }  
            } else {  
                System.out.println("****已经转换为pdf，不需要再进行转化****");  
            }  
        } else {  
            System.out.println("****swf转换器异常，需要转换的文档不存在，无法转换****");  
        }  
    }
}