因为需求,公司在原先的项目上增加了几个功能。
有几点要求:
不能降版本,poi使用的是4.1.2版本,如果是3.1.5版本的poi可以跳过这篇文章,打开百度,输入java使用poi转换word。
不能付费,本着白嫖的原则,能不付费尽量不要付费,如果付费的话,推荐一下spire.doc这个功能还挺全的,代码也简单明了,这不是广告贴。
搜了很多文章,大多写的不是很全,更多的是poi3版本,写下这篇文章,希望可以解决所遇得问题之人的烦恼,快速提高效率,话不多说上代码
word07转html
/**
* docx格式转html
* @return
*/
public static void docxToHtml(String inFileName, String outFileName){
FileInputStream source = null;
OutputStream target = null;
try {
// 待转换文档输入流
source = new FileInputStream(new File(inFileName));
// 输出目标
target = new FileOutputStream(outFileName);
// 待转换的docx文档对象
XWPFDocument doc = new XWPFDocument(source);
// 转换成html
// 待转换的docx文档对象
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File("F:/TestFile/images")));
// html中图片的路径
options.URIResolver(new BasicURIResolver("F:/TestFile/images"));
// 将样式都写为内联样式,而不是写到style标签中 默认false
options.setFragment(true);
// 省略掉footer,header标签 默认false
options.setOmitHeaderFooterPages(true);
// 忽略未用到的样式 默认true
options.setIgnoreStylesIfUnused(false);
XHTMLConverter.getInstance().convert(doc, target, options);
} catch (Exception e){
e.printStackTrace();
}finally{
try{
if (target != null)
{
//关闭资源
target.close();
}
if(source != null)
{
source.close();
}
}catch (IOException e) {
e.printStackTrace();
}
}
}
word03版本转html
/**
* doc格式转html
* @return
*/
public static void docToHtml(String inFileName,String outFileName) {
String content = null;
ByteArrayOutputStream baos = null;
try {
//新建word输入流
FileInputStream source = new FileInputStream(new File(inFileName));
//获取word对象
HWPFDocument wordDocument = new HWPFDocument(source);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//设置图片存放的位置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
File imgPath = new File("F:\\TestFile\\images");
if(!imgPath.exists()){//图片目录不存在则创建
imgPath.mkdirs();
}
File file = new File("F:\\TestFile\\images" + suggestedName);
try {
OutputStream os = new FileOutputStream(file);
os.write(content);
os.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return "F:\\TestFile\\images" + suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
//新建输入流
FileOutputStream fos = new FileOutputStream(new File(outFileName));
StreamResult streamResult = new StreamResult(fos);
serializer.transform(domSource, streamResult);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (baos != null) {
content = new String(baos.toByteArray(), "utf-8");
baos.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
所需要依赖包的地址
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>