import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.commons.io.FilenameUtils;
import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.DocumentFormat;
import com.artofsolving.jodconverter.DocumentFormatRegistry;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
public class OpenOfficeUtils {
public String pdf2html(String pdfFilePath) throws InterruptedException{
String htmlFilePath="";
if(!pdfFilePath.equals("")||pdfFilePath!=null){
String filePath = pdfFilePath.substring(0,pdfFilePath.lastIndexOf("\\"));
File htmlFile = new File(filePath+".html");//html文件路径(包括有html文件名)
htmlFilePath = filePath;//html文件路径
System.out.println("html文件的路径是 "+htmlFilePath);
String pdfName = pdfFilePath.substring(pdfFilePath.lastIndexOf("\\")+1);
String htmlFileName = pdfName.substring(0,pdfName.lastIndexOf("."))+".html";//html文件的name
//pdf2html的插件的路径
String pdf2htmlexePath = "E:\\servers\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe";
if (!(pdf2htmlexePath != null && !"".equals(pdf2htmlexePath) && pdfFilePath != null
&& !"".equals(pdfFilePath) && htmlFileName != null && !""
.equals(htmlFileName))) {
System.out.println("传递的参数有误!");
return null;
}
Runtime rt = Runtime.getRuntime();
StringBuilder command = new StringBuilder();
command.append(pdf2htmlexePath).append(" ");
if (htmlFilePath != null && !"".equals(htmlFilePath.trim()))// 生成文件存放位置,需要替换文件路径中的空格
command.append("--dest-dir ").append(htmlFilePath.replace(" ", "\" \"")).append(" ");
command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)
command.append("--zoom 1.4 ");
command.append("--process-outline 0 ");// html中显示链接:0——false,1——true
command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)
// ttf,otf,woff,svg
command.append(pdfFilePath.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格
if (htmlFileName != null && !"".equals(htmlFileName.trim())) {
command.append(htmlFileName);
if (htmlFileName.indexOf(".html") == -1)
command.append(".html");
}
try {
Process p = rt.exec(command.toString());
StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), "ERROR");
// 开启屏幕标准错误流
errorGobbler.start();
StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(), "STDOUT");
// 开启屏幕标准输出流
long o1 = System.currentTimeMillis();
outGobbler.start();
int w = p.waitFor();
int v = p.exitValue();
if (w == 0 && v == 0) {
System.out.println("转化html文件路径是 "+htmlFilePath);
return htmlFilePath+"\\"+htmlFileName;
}
} catch (Exception e) {
e.printStackTrace();
}
}else{
System.out.println("要转化的pdf文件不存在");
}
return null;
}
public static void convert(File sourceFile, File targetFile) {
try {
// 1: 打开连接
OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
connection.connect();
DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
// 2:获取Format
DocumentFormatRegistry factory = new BasicDocumentFormatRegistry();
DocumentFormat inputDocumentFormat = factory
.getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));
DocumentFormat outputDocumentFormat = factory
.getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));
// 3:执行转换
converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);
} catch (Exception e) {
System.err.println("文档转换PDF失败");
e.printStackTrace();
}
}
public static String getExtensionName(String filePath){
return FilenameUtils.getExtension(filePath);
}
public static int office2PDF(String sourceFile, String destFile) {
String openOfficePath ="C:\\Program Files (x86)\\OpenOffice 4\\";
try {
File inputFile = new File(sourceFile);
if (!inputFile.exists()) {
return -1;
// 找不到源文件, 则返回-1
}
// 如果目标路径不存在, 则新建该路径
File outputFile = new File(destFile);
if (!outputFile.getParentFile().exists()) {
outputFile.getParentFile().mkdirs();
}
String OpenOffice_HOME = openOfficePath;
//这里是OpenOffice的安装目录
// 如果从文件中读取的URL地址最后一个字符不是 '\',则添加'\'
if (OpenOffice_HOME.charAt(OpenOffice_HOME.length() - 1) != '\\') {
OpenOffice_HOME += "\\";
}
// 启动OpenOffice的服务
String command = OpenOffice_HOME + "program\\soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8100;urp; \"";
Process pro = Runtime.getRuntime().exec(command);
// connect to an OpenOffice.org instance running on port 8100
OpenOfficeConnection connection = new SocketOpenOfficeConnection(
"127.0.0.1", 8100);
connection.connect();
// convert
DocumentConverter converter = new OpenOfficeDocumentConverter(
connection);
converter.convert(inputFile, outputFile);
// close the connection
connection.disconnect();
// 关闭OpenOffice服务的进程
pro.destroy();
return 0;
}
catch (FileNotFoundException e) {
e.printStackTrace();
return -1;
}
catch (IOException e) {
e.printStackTrace();
}
return 1;
}
class StreamGobbler extends Thread
{
InputStream is;
String type; //输出流的类型ERROR或OUTPUT
StreamGobbler(InputStream is, String type)
{
this.is = is;
this.type = type;
}
public void run()
{
try
{
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String line=null;
while ( (line = br.readLine()) != null)
{
System.out.println(type + ">" + line);
System.out.flush();
}
} catch (IOException ioe)
{
ioe.printStackTrace();
}
}
}
public static void main(String[] args) throws InterruptedException {
OpenOfficeUtils o = new OpenOfficeUtils();
o.pdf2html("E:\\zl\\323.pdf");
}
}