java word文件转html (转换后可在线预览)

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.w3c.dom.Document;



public class WordToHtml {
	public static void main(String[] args) {
		try {
			wordToHtml("d:\\12.docx", "d:\\", "123.html");
			wordToHtml("d:\\2.doc", "d:\\", "12.html");
		} catch (TransformerException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ParserConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws TransformerException, IOException, ParserConfigurationException {    
			convert2Html(wordPath, htmlPath, newFilename);
    }    
    
	public static void writeFile(String content, String path) {    
        FileOutputStream fos = null;    
        BufferedWriter bw = null;
        try {    
            File file = new File(path);
            if(!file.exists()){
            	
            }
            fos = new FileOutputStream(file);    
            bw = new BufferedWriter(new OutputStreamWriter(fos));    
            bw.write(content);  
        } catch (FileNotFoundException fnfe) {    
            fnfe.printStackTrace();    
        } catch (IOException ioe) {    
            ioe.printStackTrace();    
        } finally {    
            try {    
                if (bw != null)    
                    bw.close();    
                if (fos != null)    
                    fos.close();    
            } catch (IOException ie) {    
            }    
        }    
    }    
    
    /**
     * 将word转换成html
     * 支持 .doc and .docx
     * @param fileName word文件名
     * @param outPutFilePath html存储路径
     * @param newFileName html名
     * @throws TransformerException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    public static void convert2Html(String fileName, String outPutFilePath,String newFileName)    
            throws TransformerException, IOException,    
            ParserConfigurationException {
    	String substring = fileName.substring(fileName.lastIndexOf(".")+1);
    	ByteArrayOutputStream out = new ByteArrayOutputStream();
    	
    	/**
    	 * word2007和word2003的构建方式不同,
    	 * 前者的构建方式是xml,后者的构建方式是dom树。
    	 * 文件的后缀也不同,前者后缀为.docx,后者后缀为.doc
    	 * 相应的,apache.poi提供了不同的实现类。
    	 */
    	if("docx".equals(substring)){
//    		writeFile(new String("<html><head>  <meta http-equiv=\"content-type\" content=\"text/html\" charset=\"utf-8\"/></head>对不起,.docx格式的word文档,暂时不能生成预览</html>".getBytes("utf-8")), outPutFilePath+newFileName); 
    		
    		//step 1 : load DOCX into XWPFDocument
    		InputStream inputStream = new FileInputStream(new File(fileName));
    		XWPFDocument document = new XWPFDocument(inputStream);
    		
    		//step 2 : prepare XHTML options
    		final String imageUrl = "";
    		
    		XHTMLOptions options = XHTMLOptions.create();
    		options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl)));
    		options.setIgnoreStylesIfUnused(false);
    		options.setFragment(true);
    		options.URIResolver(new IURIResolver() {
//    			@Override 重写的方法,加上这个报错,你看看是啥问题
    			public String resolve(String uri) {
    				return imageUrl + uri;
    			}
    		});
    		
    		//step 3 : convert XWPFDocument to XHTML
    		XHTMLConverter.getInstance().convert(document, out, options);
    	}else{
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));    
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(    
                DocumentBuilderFactory.newInstance().newDocumentBuilder()    
                        .newDocument());    
         wordToHtmlConverter.setPicturesManager( new PicturesManager()    
         {    
             public String savePicture( byte[] content,    
                     PictureType pictureType, String suggestedName,    
                     float widthInches, float heightInches )    
             {    
                 return suggestedName;    
             }    
         } );    
        wordToHtmlConverter.processDocument(wordDocument);    
        //save pictures    
        List pics=wordDocument.getPicturesTable().getAllPictures();    
        if(pics!=null){    
            for(int i=0;i<pics.size();i++){    
                Picture pic = (Picture)pics.get(i);    
                System.out.println();    
                try {    
                    pic.writeImageContent(new FileOutputStream(outPutFilePath    
                            + pic.suggestFullFileName()));    
                } catch (FileNotFoundException e) {    
                    e.printStackTrace();    
                }      
            }    
        }    
        Document htmlDocument = wordToHtmlConverter.getDocument();    
        DOMSource domSource = new DOMSource(htmlDocument);    
        StreamResult streamResult = new StreamResult(out);    
    
        TransformerFactory tf = TransformerFactory.newInstance();    //这个应该是转换成xml的
        Transformer serializer = tf.newTransformer();    
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");    
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");    
        serializer.setOutputProperty(OutputKeys.METHOD, "html");    
        serializer.transform(domSource, streamResult);    
    	}    
    	
    	out.close();    
    	writeFile(new String(out.toByteArray()), outPutFilePath+newFileName);    
    }
    
}

  • 6
    点赞
  • 26
    收藏
    觉得还不错? 一键收藏
  • 6
    评论
可以使用Apache POI和Aspose.Words这两个Java库来实现在线预览Word文档的功能。 Apache POI是一个用于读取、创建和修改Microsoft Office格式文件(如Word、Excel和PowerPoint)的Java库。可以使用它来读取Word文档内容并将其转换HTML格式,然后通过浏览器进行在线预览。 Aspose.Words是一个专门用于处理Word文档Java库,它可以将Word文档转换HTML格式或PDF格式,然后通过浏览器进行在线预览。 以下是使用Apache POI和Aspose.Words实现在线预览Word文档的示例代码: 1. 使用Apache POI将Word文档转换HTML格式: ```java import java.io.*; import org.apache.poi.hwpf.*; import org.apache.poi.hwpf.extractor.*; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; public class WordToHtmlConverter { public static void main(String[] args) throws Exception { // 读取Word文档内容 FileInputStream fis = new FileInputStream(new File("test.doc")); HWPFDocument doc = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(doc); String content = extractor.getText(); // 将Word文档内容转换HTML格式 Document html = Jsoup.parse(content); String htmlContent = html.html(); System.out.println(htmlContent); } } ``` 2. 使用Aspose.Words将Word文档转换HTML格式: ```java import com.aspose.words.*; public class WordToHtmlConverter { public static void main(String[] args) throws Exception { // 加载Word文档 Document doc = new Document("test.doc"); // 将Word文档转换HTML格式 HtmlSaveOptions options = new HtmlSaveOptions(); doc.save("test.html", options); // 读取HTML文件内容 FileInputStream fis = new FileInputStream(new File("test.html")); byte[] data = new byte[fis.available()]; fis.read(data); String htmlContent = new String(data); System.out.println(htmlContent); } } ``` 需要注意的是,以上示例代码仅供参考,实际使用时可能需要根据具体情况进行修改。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值