Java使用openoffice将office系列文档转换为PDF
搭建好 OpenOffice + jodconverter 后,转换doc(97-2003)时正常,但是转换 docx 时报了以下错误:
java.lang.IllegalArgumentException: unknown document format for file: E:\word.docx
openoffice2.2.2是支持转换的,但Maven 中没有 2.2.2版本的。
openoffice2.2.1依赖jar,以maven为例:
<!--jodconverter2.2.1依赖-->
<dependency>
<groupId>com.artofsolving</groupId>
<artifactId>jodconverter</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>jurt</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>ridl</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>juh</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.openoffice</groupId>
<artifactId>unoil</artifactId>
<version>3.0.1</version>
</dependency>
<!--jodconverter2.2.1必须依赖slf4j-jdk14必须这个版本,不然源码中日志会报错,很low的一个问题-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
<version>1.4.3</version>
</dependency>
jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc 07以后版本xxx.docx
解决方案
重写BasicDocumentFormatRegistry类中public DocumentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式
在项目的java下com目录下新建artofsolving.jodconverter这两个包
package com.artofsolving.jodconverter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* @ClassName: online
* @description: 重写 BasicDocumentFormatRegistry 文档格式
* @Author: yandongfa
* @Data: 2020-03-24 19:47
* @Version: 1.0
**/
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
private List/* <DocumentFormat> */ documentFormats = new ArrayList();
public void addDocumentFormat(DocumentFormat documentFormat) {
documentFormats.add(documentFormat);
}
protected List/* <DocumentFormat> */ getDocumentFormats() {
return documentFormats;
}
/**
* @param extension
* the file extension
* @return the DocumentFormat for this extension, or null if the extension
* is not mapped
*/
public DocumentFormat getFormatByFileExtension(String extension) {
if (extension == null) {
return null;
}
//new DefaultDocumentFormatRegistry();
//将文件名后缀统一转化
if (extension.indexOf("doc") >= 0) {
extension = "doc";
}
if (extension.indexOf("ppt") >= 0) {
extension = "ppt";
}
if (extension.indexOf("xls") >= 0) {
extension = "xls";
}
String lowerExtension = extension.toLowerCase();
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
DocumentFormat format = (DocumentFormat) it.next();
if (format.getFileExtension().equals(lowerExtension)) {
return format;
}
}
return null;
}
public DocumentFormat getFormatByMimeType(String mimeType) {
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
DocumentFormat format = (DocumentFormat) it.next();
if (format.getMimeType().equals(mimeType)) {
return format;
}
}
return null;
}
}