POI实现DOC/DOCX转HTML

最新推荐文章于 2024-03-02 18:29:49 发布

luka2008

最新推荐文章于 2024-03-02 18:29:49 发布

阅读量3.4w

点赞数 2

分类专栏： Java POI 文章标签： poi java

本文链接：https://blog.csdn.net/luka2008/article/details/21168287

版权

Java 同时被 2 个专栏收录

12 篇文章 0 订阅

订阅专栏

POI

2 篇文章 0 订阅

订阅专栏

1.使用HWPF处理DOC

public class DocToHtml {  
    
    private static final String encoding = "UTF-8";

    public static String convert2Html(String wordPath)
            throws FileNotFoundException, TransformerException, IOException,
            ParserConfigurationException {
        if( wordPath == null || "".equals(wordPath) ) return "";
        File file = new File(wordPath);
        if( file.exists() && file.isFile() )
            return convert2Html(new FileInputStream(file));
        else
            return "";
    }
    
    public static String convert2Html(String wordPath, String context)
    throws FileNotFoundException, TransformerException, IOException,
    ParserConfigurationException {
        if( wordPath == null || "".equals(wordPath) ) return "";
        File file = new File(wordPath);
        if( file.exists() && file.isFile() )
            return convert2Html(new FileInputStream(file), context);
        else
            return "";
    }
  
    public static String convert2Html(InputStream is)
            throws TransformerException, IOException,
            ParserConfigurationException {
        return convert2Html(is, "");
    }

    public static String convert2Html(InputStream is, HttpServletRequest req) throws TransformerException, IOException,    ParserConfigurationException {
        return convert2Html(is, req.getContextPath());
    }
    
    public static String convert2Html(InputStream is, final String context) throws IOException, ParserConfigurationException, TransformerException {
        HWPFDocument wordDocument = new HWPFDocument(is);
        WordToHtmlConverter converter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");
        final String prefix = sdf.format(new Date());
        final Map<Object, String> suffixMap = new HashMap<Object, String>();
        
        converter.setPicturesManager(new PicturesManager() {
                public String savePicture(byte[] content, PictureType pictureType,
                        String suggestedName, float widthInches, float heightInches) {
                    String prefixContext = context.replace("\\", "").replace("/", "");
                    prefixContext = StringUtils.isNotBlank(prefixContext) ? "/" + prefixContext + "/" : prefixContext;
                    suffixMap.put(new String(content).replace(" ", "").length(), suggestedName);
                    
                    return  prefixContext
                            + UeConstants.VIEW_IMAGE_PATH + "/" + UeConstants.UEDITOR_PATH
                            + "/" + UeConstants.UEDITOR_IMAGE_PATH + "/"
                            + prefix + "_"
                            + suggestedName;
                }
        });
        converter.processDocument(wordDocument);
        
        List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for(Picture pic : pics) {
                try {
                    pic.writeImageContent(new FileOutputStream(
                            UeConstants.IMAGE_PATH
                                 + "/" + prefix + "_" + suffixMap.get(new String(pic.getContent()).replace(" ", "").length())));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        
        StringWriter writer = new StringWriter();
        
        Transformer serializer = TransformerFactory.newInstance().newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, encoding);
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(
                new DOMSource(converter.getDocument()),
                new StreamResult(writer) );
        writer.close();
        return writer.toString();
    }
}

2.使用XWPFDocument处理DOCX

public class XHTMLConverterTestCase
    extends AbstractXWPFPOIConverterTest
{

    protected void doGenerate( String fileInName )
        throws IOException
    {
        doGenerateSysOut( fileInName );
        doGenerateHTMLFile( fileInName );
    }

    protected void doGenerateSysOut( String fileInName )
        throws IOException
    {

        long startTime = System.currentTimeMillis();

        XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );

        XHTMLOptions options = XHTMLOptions.create().indent( 4 );
        OutputStream out = System.out;
        XHTMLConverter.getInstance().convert( document, out, options );

        System.err.println( "Elapsed time=" + ( System.currentTimeMillis() - startTime ) + "(ms)" );
    }

    protected void doGenerateHTMLFile( String fileInName )
        throws IOException
    {

        String root = "target";
        String fileOutName = root + "/" + fileInName + ".html";

        long startTime = System.currentTimeMillis();

        XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );

        XHTMLOptions options = XHTMLOptions.create();// .indent( 4 );
        // Extract image
        File imageFolder = new File( root + "/images/" + fileInName );
        options.setExtractor( new FileImageExtractor( imageFolder ) );
        // URI resolver
        options.URIResolver( new FileURIResolver( imageFolder ) );

        OutputStream out = new FileOutputStream( new File( fileOutName ) );
        XHTMLConverter.getInstance().convert( document, out, options );

        System.out.println( "Generate " + fileOutName + " with " + ( System.currentTimeMillis() - startTime ) + " ms." );
    }
}

项目下载地址：http://download.csdn.net/detail/luka2008/7902285

luka2008

关注

2
点赞
踩
16

收藏

觉得还不错? 一键收藏
15
评论
POI实现DOC/DOCX转HTML

1.使用HWPF处理DOCpublic class DocToHtml { private static final String encoding = "UTF-8"; public static String convert2Html(String wordPath) throws FileNotFoundException, Tra
复制链接

扫一扫