html转换docx,word(doc,docx)转换为HTML

maven          

org.apache.poi

poi

3.12

org.apache.poi

poi-ooxml

3.12

org.apache.poi

poi-ooxml-schemas

3.12

org.apache.poi

poi-scratchpad

3.12

fr.opensagres.xdocreport

org.apache.poi.xwpf.converter.xhtml

1.0.5

代码

/**

* 2007版本word转换成html

* @throws IOException

*/

@Test

public void Word2007ToHtml() throws IOException {

String filepath = "D:/test/";

String fileName = "test.docx";

String htmlName = "test.html";

final String file = filepath + fileName;

File f = new File(file);

if (!f.exists()) {

System.out.println("Sorry File does not Exists!");

} else {

if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {

//1) 加载word文档生成 XWPFDocument对象

InputStream in = new FileInputStream(f);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)

File imageFolderFile = new File(filepath+"/images");

XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver("images"));

options.setExtractor(new FileImageExtractor(imageFolderFile));

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

// 3) 将 XWPFDocument转换成XHTML

OutputStream out = new FileOutputStream(new File(filepath + htmlName));

XHTMLConverter.getInstance().convert(document, out, options);

//也可以使用字符数组流获取解析的内容

// ByteArrayOutputStream baos = new ByteArrayOutputStream();

// XHTMLConverter.getInstance().convert(document, baos, options);

// String content = baos.toString();

// System.out.println(content);

// baos.close();

} else {

System.out.println("Enter only MS Office 2007+ files");

}

}

}

/**

* /**

* 2003版本word转换成html

* @throws IOException

* @throws TransformerException

* @throws ParserConfigurationException

*/

@Test

public void Word2003ToHtml() throws IOException, TransformerException, ParserConfigurationException {

String filepath = "D:/";

final String imagepath = "D:/image/";

String fileName = "test.doc";

String htmlName = "test.html";

final String file = filepath + fileName;

InputStream input = new FileInputStream(new File(file));

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

//设置图片存放的位置

wordToHtmlConverter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {

File imgPath = new File(imagepath);

if(!imgPath.exists()){//图片目录不存在则创建

imgPath.mkdirs();

}

File file = new File(imagepath + suggestedName);

try {

OutputStream os = new FileOutputStream(file);

os.write(content);

os.close();

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

return imagepath + suggestedName;

}

});

//解析word文档

wordToHtmlConverter.processDocument(wordDocument);

Document htmlDocument = wordToHtmlConverter.getDocument();

File htmlFile = new File(filepath + htmlName);

OutputStream outStream = new FileOutputStream(htmlFile);

//也可以使用字符数组流获取解析的内容

// ByteArrayOutputStream baos = new ByteArrayOutputStream();

// OutputStream outStream = new BufferedOutputStream(baos);

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory factory = TransformerFactory.newInstance();

Transformer serializer = factory.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

//也可以使用字符数组流获取解析的内容

// String content = baos.toString();

// System.out.println(content);

// baos.close();

outStream.close();

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值