POI转HTML(仅针对doc):
package com.vito.demo.test; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.InputStream; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.io.FileUtils; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document; public class PoiWordToHtml { public static void main(String[] args) throws Throwable { final String path = "G:\\doc\\"; final String file = "客户需求文档.doc"; InputStream input = new FileInputStream(path + file); HWPFDocument wordDocument = new HWPFDocument(input); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return suggestedName; } }); wordToHtmlConverter.processDocument(wordDocument); List pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { pic.writeImageContent(new FileOutputStream(path + pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(outStream); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); outStream.close(); String content = new String(outStream.toByteArray()); FileUtils.write(new File(path, "1.html"), content, "utf-8"); } }html转word:
public void htmlToWord2() throws Exception { InputStream bodyIs = new FileInputStream("f:\\1.html"); InputStream cssIs = new FileInputStream("f:\\1.css"); String body = this.getContent(bodyIs); String css = this.getContent(cssIs); //拼一个标准的HTML格式文档 String content = "<html><head><style>" + css + "</style></head><body>" + body + "</body></html>"; InputStream is = new ByteArrayInputStream(content.getBytes("GBK")); OutputStream os = new FileOutputStream("f:\\1.doc"); this.inputStreamToWord(is, os); } /** * 把is写入到对应的word输出流os中 * 不考虑异常的捕获,直接抛出 * @param is * @param os * @throws IOException */ private void inputStreamToWord(InputStream is, OutputStream os) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(); //对应于org.apache.poi.hdf.extractor.WordDocument fs.createDocument(is, "WordDocument"); fs.writeFilesystem(os); os.close(); is.close(); } /** * 把输入流里面的内容以UTF-8编码当文本取出。 * 不考虑异常,直接抛出 * @param ises * @return * @throws IOException */ private String getContent(InputStream... ises) throws IOException { if (ises != null) { StringBuilder result = new StringBuilder(); BufferedReader br; String line; for (InputStream is : ises) { br = new BufferedReader(new InputStreamReader(is, "UTF-8")); while ((line=br.readLine()) != null) { result.append(line); } } return result.toString(); } return null; }