原文:https://blog.csdn.net/make_a_difference/article/details/53771136
前导:
- 开发过程中经常会使用java将office系列文档转换为PDF, 一般都使用微软提供的openoffice+jodconverter 实现转换文档。
- openoffice既有windows版本也有linux版。不用担心生产环境是linux系统。
- 关于linux系统安装openoffice软件请参照:待更新...
java使用SWFTools将PDF转成swf并使用flexpaper播放PDF
1、openoffice依赖jar,以maven为例:
-
<dependency>
-
<groupId>com.artofsolving
</groupId>
-
<artifactId>jodconverter
</artifactId>
-
<version>2.2.1
</version>
-
</dependency>
-
<dependency>
-
<groupId>org.openoffice
</groupId>
-
<artifactId>jurt
</artifactId>
-
<version>3.0.1
</version>
-
</dependency>
-
<dependency>
-
<groupId>org.openoffice
</groupId>
-
<artifactId>ridl
</artifactId>
-
<version>3.0.1
</version>
-
</dependency>
-
<dependency>
-
<groupId>org.openoffice
</groupId>
-
<artifactId>juh
</artifactId>
-
<version>3.0.1
</version>
-
</dependency>
-
<dependency>
-
<groupId>org.openoffice
</groupId>
-
<artifactId>unoil
</artifactId>
-
<version>3.0.1
</version>
-
</dependency>
-
-
<!--jodconverter2.2.1必须依赖slf4j-jdk14必须这个版本,不然源码中日志会报错,很low的一个问题-->
-
<dependency>
-
<groupId>org.slf4j
</groupId>
-
<artifactId>slf4j-jdk14
</artifactId>
-
<version>1.4.3
</version>
-
</dependency>
2、直接上转换代码,需要监听openoffice应用程序8100端口即可。
-
public void convert(File sourceFile, File targetFile) {
-
-
try {
-
// 1: 打开连接
-
OpenOfficeConnection connection =
new SocketOpenOfficeConnection(
8100);
-
connection.connect();
-
-
DocumentConverter converter =
new OpenOfficeDocumentConverter(connection);
-
// 2:获取Format
-
DocumentFormatRegistry factory =
new BasicDocumentFormatRegistry();
-
DocumentFormat inputDocumentFormat = factory
-
.getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));
-
DocumentFormat outputDocumentFormat = factory
-
.getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));
-
// 3:执行转换
-
converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);
-
}
catch (ConnectException e) {
-
log.info(
"文档转换PDF失败");
-
}
-
}
3、需注意:jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc 07以后版本xxx.docx
查看jodconverter源码发现documentFormat不支持xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)默认支持是使用doc格式
BasicDocumentFormatRegistry类源码
-
//
-
// JODConverter - Java OpenDocument Converter
-
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
-
//
-
// This library is free software; you can redistribute it and/or
-
// modify it under the terms of the GNU Lesser General Public
-
// License as published by the Free Software Foundation; either
-
// version 2.1 of the License, or (at your option) any later version.
-
//
-
// This library is distributed in the hope that it will be useful,
-
// but WITHOUT ANY WARRANTY; without even the implied warranty of
-
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-
// Lesser General Public License for more details.
-
// http://www.gnu.org/copyleft/lesser.html
-
//
-
package com.artofsolving.jodconverter;
-
-
import java.util.ArrayList;
-
import java.util.Iterator;
-
import java.util.List;
-
-
public
class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
-
-
private List
/*<DocumentFormat>*/ documentFormats =
new ArrayList();
-
-
public void addDocumentFormat(DocumentFormat documentFormat) {
-
documentFormats.add(documentFormat);
-
}
-
-
protected List
/*<DocumentFormat>*/ getDocumentFormats() {
-
return documentFormats;
-
}
-
-
/**
-
* @param extension the file extension
-
* @return the DocumentFormat for this extension, or null if the extension is not mapped
-
*/
-
public DocumentFormat getFormatByFileExtension(String extension) {
-
if (extension ==
null) {
-
return
null;
-
}
-
String lowerExtension = extension.toLowerCase();
-
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
-
DocumentFormat format = (DocumentFormat) it.next();
-
if (format.getFileExtension().equals(lowerExtension)) {
-
return format;
-
}
-
}
-
return
null;
-
}
-
-
public DocumentFormat getFormatByMimeType(String mimeType) {
-
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
-
DocumentFormat format = (DocumentFormat) it.next();
-
if (format.getMimeType().equals(mimeType)) {
-
return format;
-
}
-
}
-
return
null;
-
}
-
}
BasicDocumentFormatRegistry的默认实现类DefaultDocumentFormatRegistry 中支持的文件格式如下
-
-
//
-
// JODConverter - Java OpenDocument Converter
-
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
-
//
-
// This library is free software; you can redistribute it and/or
-
// modify it under the terms of the GNU Lesser General Public
-
// License as published by the Free Software Foundation; either
-
// version 2.1 of the License, or (at your option) any later version.
-
//
-
// This library is distributed in the hope that it will be useful,
-
// but WITHOUT ANY WARRANTY; without even the implied warranty of
-
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-
// Lesser General Public License for more details.
-
// http://www.gnu.org/copyleft/lesser.html
-
//
-
package com.artofsolving.jodconverter;
-
-
public
class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry {
-
-
public DefaultDocumentFormatRegistry() {
-
final DocumentFormat pdf =
new DocumentFormat(
"Portable Document Format",
"application/pdf",
"pdf");
-
pdf.setExportFilter(DocumentFamily.DRAWING,
"draw_pdf_Export");
-
pdf.setExportFilter(DocumentFamily.PRESENTATION,
"impress_pdf_Export");
-
pdf.setExportFilter(DocumentFamily.SPREADSHEET,
"calc_pdf_Export");
-
pdf.setExportFilter(DocumentFamily.TEXT,
"writer_pdf_Export");
-
addDocumentFormat(pdf);
-
-
final DocumentFormat swf =
new DocumentFormat(
"Macromedia Flash",
"application/x-shockwave-flash",
"swf");
-
swf.setExportFilter(DocumentFamily.DRAWING,
"draw_flash_Export");
-
swf.setExportFilter(DocumentFamily.PRESENTATION,
"impress_flash_Export");
-
addDocumentFormat(swf);
-
-
final DocumentFormat xhtml =
new DocumentFormat(
"XHTML",
"application/xhtml+xml",
"xhtml");
-
xhtml.setExportFilter(DocumentFamily.PRESENTATION,
"XHTML Impress File");
-
xhtml.setExportFilter(DocumentFamily.SPREADSHEET,
"XHTML Calc File");
-
xhtml.setExportFilter(DocumentFamily.TEXT,
"XHTML Writer File");
-
addDocumentFormat(xhtml);
-
-
// HTML is treated as Text when supplied as input, but as an output it is also
-
// available for exporting Spreadsheet and Presentation formats
-
final DocumentFormat html =
new DocumentFormat(
"HTML", DocumentFamily.TEXT,
"text/html",
"html");
-
html.setExportFilter(DocumentFamily.PRESENTATION,
"impress_html_Export");
-
html.setExportFilter(DocumentFamily.SPREADSHEET,
"HTML (StarCalc)");
-
html.setExportFilter(DocumentFamily.TEXT,
"HTML (StarWriter)");
-
addDocumentFormat(html);
-
-
final DocumentFormat odt =
new DocumentFormat(
"OpenDocument Text", DocumentFamily.TEXT,
"application/vnd.oasis.opendocument.text",
"odt");
-
odt.setExportFilter(DocumentFamily.TEXT,
"writer8");
-
addDocumentFormat(odt);
-
-
final DocumentFormat sxw =
new DocumentFormat(
"OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT,
"application/vnd.sun.xml.writer",
"sxw");
-
sxw.setExportFilter(DocumentFamily.TEXT,
"StarOffice XML (Writer)");
-
addDocumentFormat(sxw);
-
-
final DocumentFormat doc =
new DocumentFormat(
"Microsoft Word", DocumentFamily.TEXT,
"application/msword",
"doc");
-
doc.setExportFilter(DocumentFamily.TEXT,
"MS Word 97");
-
addDocumentFormat(doc);
-
-
final DocumentFormat rtf =
new DocumentFormat(
"Rich Text Format", DocumentFamily.TEXT,
"text/rtf",
"rtf");
-
rtf.setExportFilter(DocumentFamily.TEXT,
"Rich Text Format");
-
addDocumentFormat(rtf);
-
-
final DocumentFormat wpd =
new DocumentFormat(
"WordPerfect", DocumentFamily.TEXT,
"application/wordperfect",
"wpd");
-
addDocumentFormat(wpd);
-
-
final DocumentFormat txt =
new DocumentFormat(
"Plain Text", DocumentFamily.TEXT,
"text/plain",
"txt");
-
// set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog
-
// alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed
-
txt.setImportOption(
"FilterName",
"Text");
-
txt.setExportFilter(DocumentFamily.TEXT,
"Text");
-
addDocumentFormat(txt);
-
-
final DocumentFormat wikitext =
new DocumentFormat(
"MediaWiki wikitext",
"text/x-wiki",
"wiki");
-
wikitext.setExportFilter(DocumentFamily.TEXT,
"MediaWiki");
-
addDocumentFormat(wikitext);
-
-
final DocumentFormat ods =
new DocumentFormat(
"OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET,
"application/vnd.oasis.opendocument.spreadsheet",
"ods");
-
ods.setExportFilter(DocumentFamily.SPREADSHEET,
"calc8");
-
addDocumentFormat(ods);
-
-
final DocumentFormat sxc =
new DocumentFormat(
"OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET,
"application/vnd.sun.xml.calc",
"sxc");
-
sxc.setExportFilter(DocumentFamily.SPREADSHEET,
"StarOffice XML (Calc)");
-
addDocumentFormat(sxc);
-
-
final DocumentFormat xls =
new DocumentFormat(
"Microsoft Excel", DocumentFamily.SPREADSHEET,
"application/vnd.ms-excel",
"xls");
-
xls.setExportFilter(DocumentFamily.SPREADSHEET,
"MS Excel 97");
-
addDocumentFormat(xls);
-
-
final DocumentFormat csv =
new DocumentFormat(
"CSV", DocumentFamily.SPREADSHEET,
"text/csv",
"csv");
-
csv.setImportOption(
"FilterName",
"Text - txt - csv (StarCalc)");
-
csv.setImportOption(
"FilterOptions",
"44,34,0");
// Field Separator: ','; Text Delimiter: '"'
-
csv.setExportFilter(DocumentFamily.SPREADSHEET,
"Text - txt - csv (StarCalc)");
-
csv.setExportOption(DocumentFamily.SPREADSHEET,
"FilterOptions",
"44,34,0");
-
addDocumentFormat(csv);
-
-
final DocumentFormat tsv =
new DocumentFormat(
"Tab-separated Values", DocumentFamily.SPREADSHEET,
"text/tab-separated-values",
"tsv");
-
tsv.setImportOption(
"FilterName",
"Text - txt - csv (StarCalc)");
-
tsv.setImportOption(
"FilterOptions",
"9,34,0");
// Field Separator: '\t'; Text Delimiter: '"'
-
tsv.setExportFilter(DocumentFamily.SPREADSHEET,
"Text - txt - csv (StarCalc)");
-
tsv.setExportOption(DocumentFamily.SPREADSHEET,
"FilterOptions",
"9,34,0");
-
addDocumentFormat(tsv);
-
-
final DocumentFormat odp =
new DocumentFormat(
"OpenDocument Presentation", DocumentFamily.PRESENTATION,
"application/vnd.oasis.opendocument.presentation",
"odp");
-
odp.setExportFilter(DocumentFamily.PRESENTATION,
"impress8");
-
addDocumentFormat(odp);
-
-
final DocumentFormat sxi =
new DocumentFormat(
"OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION,
"application/vnd.sun.xml.impress",
"sxi");
-
sxi.setExportFilter(DocumentFamily.PRESENTATION,
"StarOffice XML (Impress)");
-
addDocumentFormat(sxi);
-
-
final DocumentFormat ppt =
new DocumentFormat(
"Microsoft PowerPoint", DocumentFamily.PRESENTATION,
"application/vnd.ms-powerpoint",
"ppt");
-
ppt.setExportFilter(DocumentFamily.PRESENTATION,
"MS PowerPoint 97");
-
addDocumentFormat(ppt);
-
-
final DocumentFormat odg =
new DocumentFormat(
"OpenDocument Drawing", DocumentFamily.DRAWING,
"application/vnd.oasis.opendocument.graphics",
"odg");
-
odg.setExportFilter(DocumentFamily.DRAWING,
"draw8");
-
addDocumentFormat(odg);
-
-
final DocumentFormat svg =
new DocumentFormat(
"Scalable Vector Graphics",
"image/svg+xml",
"svg");
-
svg.setExportFilter(DocumentFamily.DRAWING,
"draw_svg_Export");
-
addDocumentFormat(svg);
-
}
-
}
解决方法:重写BasicDocumentFormatRegistry类中public DocumentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式
-
//
-
// JODConverter - Java OpenDocument Converter
-
// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>
-
//
-
// This library is free software; you can redistribute it and/or
-
// modify it under the terms of the GNU Lesser General Public
-
// License as published by the Free Software Foundation; either
-
// version 2.1 of the License, or (at your option) any later version.
-
//
-
// This library is distributed in the hope that it will be useful,
-
// but WITHOUT ANY WARRANTY; without even the implied warranty of
-
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-
// Lesser General Public License for more details.
-
// http://www.gnu.org/copyleft/lesser.html
-
//
-
package com.artofsolving.jodconverter;
-
-
import java.util.ArrayList;
-
import java.util.Iterator;
-
import java.util.List;
-
-
/**
-
* 重写 BasicDocumentFormatRegistry 文档格式
-
* @author HuGuangJun
-
*/
-
public
class BasicDocumentFormatRegistry implements DocumentFormatRegistry {
-
-
private List
/* <DocumentFormat> */ documentFormats =
new ArrayList();
-
-
public void addDocumentFormat(DocumentFormat documentFormat) {
-
documentFormats.add(documentFormat);
-
}
-
-
protected List
/* <DocumentFormat> */ getDocumentFormats() {
-
return documentFormats;
-
}
-
-
/**
-
* @param extension
-
* the file extension
-
* @return the DocumentFormat for this extension, or null if the extension
-
* is not mapped
-
*/
-
public DocumentFormat getFormatByFileExtension(String extension) {
-
if (extension ==
null) {
-
return
null;
-
}
-
//将文件名后缀统一转化
-
if (extension.indexOf(
"doc") >=
0) {
-
extension =
"doc";
-
}
-
if (extension.indexOf(
"ppt") >=
0) {
-
extension =
"ppt";
-
}
-
if (extension.indexOf(
"xls") >=
0) {
-
extension =
"xls";
-
}
-
String lowerExtension = extension.toLowerCase();
-
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
-
DocumentFormat format = (DocumentFormat) it.next();
-
if (format.getFileExtension().equals(lowerExtension)) {
-
return format;
-
}
-
}
-
return
null;
-
}
-
-
public DocumentFormat getFormatByMimeType(String mimeType) {
-
for (Iterator it = documentFormats.iterator(); it.hasNext();) {
-
DocumentFormat format = (DocumentFormat) it.next();
-
if (format.getMimeType().equals(mimeType)) {
-
return format;
-
}
-
}
-
return
null;
-
}
-
}