jar要注意导入的jar如果有的包缺少或者导入版本不对会在运行的时候报异常
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.html</groupId>
<artifactId>html</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
</dependencies>
</project>
doc转html
package com.html;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
public class DocToHtml {
public static String docToHtml() throws Exception {
File path = new File("C:/mb");
String imagePathStr = path.getAbsolutePath() + "\\static\\image\\";
String sourceFileName = path.getAbsolutePath() + "\\static\\test.doc";
String targetFileName = path.getAbsolutePath() + "\\static\\test2.html";
File file = new File(imagePathStr);
if(!file.exists()) {
file.mkdirs();
}
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return "image/" + name;
});
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
return targetFileName;
}
}
调用
public class DocHtml {
public static void main(String[] args) throws Exception {
String docxToHtml = DocToHtml.docToHtml();
String readfile = DocGetHtml.readfile(docxToHtml);
System.out.println(readfile);
int indexOf = readfile.indexOf("<body>");
String substring1 = readfile.substring(indexOf+6);
int indexOf2 = substring1.indexOf("</body>");
String substring2 = substring1.substring(0,indexOf2);
System.out.println(substring2);
}
}
docx转html
package com.html;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
public class DocxToHtml {
public static String docxToHtml() throws Exception {
File path = new File("C:/mb");
String imagePath = path.getAbsolutePath() + "\\static\\image";
String sourceFileName = path.getAbsolutePath() + "\\static\\test.docx";
String targetFileName = path.getAbsolutePath() + "\\static\\test.html";
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePath)));
// html中图片的路径
options.URIResolver(new BasicURIResolver("image"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
return targetFileName;
}
}
调用
package com.html;
public class DocxHtml {
public static void main(String[] args) throws Exception {
String docxToHtml = DocxToHtml.docxToHtml();
String readfile = DocGetHtml.readfile(docxToHtml);
System.out.println(readfile);
int indexOf = readfile.indexOf("<body>");
String substring1 = readfile.substring(indexOf+6);
int indexOf2 = substring1.indexOf("</body>");
String substring2 = substring1.substring(0,indexOf2);
System.out.println(substring2);
}
}
在实际项目里面是在gradle里面使用的,因为是大项目已经导了一些包然后没有全部导入,所以一直导致找不到包还是找了很久原因
实际项目这样使用
"org.apache.poi:poi:3.11",
"org.apache.poi:poi-excelant:3.11",
"org.apache.poi:poi-ooxml:3.11",
"org.apache.poi:poi-ooxml-schemas:3.11",
"org.apache.poi:poi-scratchpad:3.11",
"org.apache.xmlbeans:xmlbeans:2.6.0",
compile("fr.opensagres.xdocreport:xdocreport:1.0.6")
compile("org.jsoup:jsoup:1.11.3")
compile("org.apache.poi:poi-scratchpad:3.12")
compile("org.apache.poi:ooxml-schemas:1.1")
在判断doc还是docx时是直接拿后缀来判断的,但是实际中可能有人是直接改后缀名或者其他的操作在doc时会报错说要用docx的方法,所以进行异常处理时调用docx的
HWPFDocument wordDocument=null;
try{
wordDocument = new HWPFDocument(fileInputStream);
}catch (Exception e) {
return docxToHtml(Filepath,SourceFilePath);
}
DocGetHtml.readfile的部分
package com.ly.mp.pvoa.news.service;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.jsoup.Jsoup;
public class HtmlFotStringUtil {
public static String readfile(String filePath) {
File file = new File(filePath);
StringBuffer htmlSb = new StringBuffer();
org.jsoup.nodes.Document parse;
try {
parse = Jsoup.parse(file, "utf-8");
htmlSb.append(parse.html());
} catch (IOException e1) {
// TODO 自动生成的 catch 块
e1.printStackTrace();
}
return htmlSb.toString();
/*
InputStream input = null;
try {
input = new FileInputStream(file);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
StringBuffer buffer = new StringBuffer();
byte[] bytes = new byte[1024];
try {
for (int n; (n = input.read(bytes)) != -1;) {
buffer.append(new String(bytes, 0, n, "utf-8"));
}
} catch (IOException e) {
e.printStackTrace();
}
return buffer.toString();*/
}
}