Android使用POI打开word文档

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/zhaihmj/article/details/73277021

最近使用Apache的POI包解析打开word文档,遇到不少问题。各种报错。折腾了两天,发现主要问题在WordToHtmlCOnverter.java这个类存在各种问题(好歹这么大的公司,代码太不严谨了吧,最基本的判断都没有。。。)。
尝试了各种重写、重新打包jar包,都不好使。最后,自己按照他的方法重新写了这个类,在此做个记录,也希望能帮到其他朋友。
话不多说,上代码。

1、布局,就是一个WebView

<?xml version="1.0" encoding="utf-8"?>
<android.support.constraint.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context="com.hfga.docview.DocActivity">

    <WebView
        android:id="@+id/docview"
        android:layout_width="match_parent"
        android:layout_height="match_parent"></WebView>

</android.support.constraint.ConstraintLayout>

2、Activity

import android.os.Bundle;
import android.support.v7.app.AppCompatActivity;
import android.webkit.WebSettings;
import android.webkit.WebView;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;

import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;


public class DocActivity extends AppCompatActivity {

    private WebView webView;

    private String docPath = "/mnt/sdcard/Document/";
    private String docName = "test.doc";
    private String savePath = "/mnt/sdcard/Document/temp/";

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_doc);
        initView();
    }

    private void initView() {
        webView = (WebView) findViewById(R.id.docview);
        String name = docName.substring(0, docName.indexOf("."));
        if (!(new File(savePath + name).exists())) {
            new File(savePath + name).mkdirs();
        }
        try {
            convert2Html(docPath + docName, savePath + name + ".html");
        } catch (TransformerException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
        WebSettings webSettings = webView.getSettings();
        webSettings.setLoadWithOverviewMode(true);
        webSettings.setSupportZoom(true);
        webSettings.setBuiltInZoomControls(true);
        webView.loadUrl("file://" + savePath + name + ".html");
    }

    /**
     * word文档转成html格式
     */
    public void convert2Html(String fileName, String outPutFile)
            throws TransformerException, IOException,
            ParserConfigurationException {
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

        //设置图片路径
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            public String savePicture(byte[] content,
                                      PictureType pictureType, String suggestedName,
                                      float widthInches, float heightInches) {
                String name = docName.substring(0, docName.indexOf("."));
                return name + "/" + suggestedName;
            }
        });

        //保存图片
        List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                System.out.println(pic.suggestFullFileName());
                try {
                    String name = docName.substring(0, docName.indexOf("."));
                    pic.writeImageContent(new FileOutputStream(savePath + name + "/"
                            + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        wordToHtmlConverter.processDocument(wordDocument);
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
        //保存html文件
        writeFile(new String(out.toByteArray()), outPutFile);
    }

    /**
     * 将html文件保存到sd卡
     */
    public void writeFile(String content, String path) {
        FileOutputStream fos = null;
        BufferedWriter bw = null;
        try {
            File file = new File(path);
            if (!file.exists()) {
                file.createNewFile();
            }
            fos = new FileOutputStream(file);
            bw = new BufferedWriter(new OutputStreamWriter(fos, "utf-8"));
            bw.write(content);
        } catch (FileNotFoundException fnfe) {
            fnfe.printStackTrace();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        } finally {
            try {
                if (bw != null)
                    bw.close();
                if (fos != null)
                    fos.close();
            } catch (IOException ie) {
            }
        }
    }
}

3、最主要的 WordToHtmlConverter类
这个类就是把他包里的拷贝过来,有一些包里私有的不能访问的方法直接写在这个类里面。引发jar包不适的是compactChildNodesR(Element parentElement, String childTagName)这个方法,嗯、该吃药了,做了简单的手术,在里面做了标注,大家可以看一看。


import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.AbstractWordConverter;
import org.apache.poi.hwpf.converter.AbstractWordUtils;
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hwpf.converter.HtmlDocumentFacade;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.util.Beta;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.XMLHelper;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

import java.io.File;
import java.io.IOException;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;

/**
 * Converts Word files (95-2007) into HTML files.
 * <p>
 * This implementation doesn't create images or links to them. This can be
 * changed by overriding {@link #processImage(Element, boolean, Picture)}
 * method.
 */
@Beta
public class WordToHtmlConverter extends AbstractWordConverter {
    private static final POILogger logger = POILogFactory.getLogger(WordToHtmlConverter.class);
    private final Deque<BlockProperies> blocksProperies = new LinkedList<BlockProperies>();
    private final HtmlDocumentFacade htmlDocumentFacade;
    private Element notes;

    /**
     * Creates new instance of {@link WordToHtmlConverter}. Can be used for
     * output several {@link HWPFDocument}s into single HTML document.
     *
     * @param document XML DOM Document used as HTML document
     */
    public WordToHtmlConverter(Document document) {
        this.htmlDocumentFacade = new HtmlDocumentFacade(document);
    }

    public WordToHtmlConverter(HtmlDocumentFacade htmlDocumentFacade) {
        this.htmlDocumentFacade = htmlDocumentFacade;
    }

    private static String getSectionStyle(Section section) {
        float leftMargin = section.getMarginLeft() / TWIPS_PER_INCH;
        float rightMargin = section.getMarginRight() / TWIPS_PER_INCH;
        float topMargin = section.getMarginTop() / TWIPS_PER_INCH;
        float bottomMargin = section.getMarginBottom() / TWIPS_PER_INCH;

        String style = "margin: " + topMargin + "in " + rightMargin + "in "
                + bottomMargin + "in " + leftMargin + "in;";

        if (section.getNumColumns() > 1) {
            style += "column-count: " + (section.getNumColumns()) + ";";
            if (section.isColumnsEvenlySpaced()) {
                float distance = section.getDistanceBetweenColumns()
                        / TWIPS_PER_INCH;
                style += "column-gap: " + distance + "in;";
            } else {
                style += "column-gap: 0.25in;";
            }
        }
        return style;
    }

    /**
     * Java main() interface to interact with {@link WordToHtmlConverter}<p>
     * <p>
     * Usage: WordToHtmlConverter infile outfile<p>
     * <p>
     * Where infile is an input .doc file ( Word 95-2007) which will be rendered
     * as HTML into outfile
     */
    public static void main(String[] args)
            throws IOException, ParserConfigurationException, TransformerException {
        if (args.length < 2) {
            System.err.println("Usage: WordToHtmlConverter <inputFile.doc> <saveTo.html>");
            return;
        }

        System.out.println("Converting " + args[0]);
        System.out.println("Saving output to " + args[1]);

        Document doc = WordToHtmlConverter.process(new File(args[0]));

        DOMSource domSource = new DOMSource(doc);
        StreamResult streamResult = new StreamResult(new File(args[1]));

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        // TODO set encoding from a command argument
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
    }

    static Document process(File docFile) throws IOException, ParserConfigurationException {
        final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc(docFile);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                XMLHelper.getDocumentBuilderFactory().newDocumentBuilder()
                        .newDocument());
        wordToHtmlConverter.processDocument(wordDocument);
        return wordToHtmlConverter.getDocument();
    }

    static boolean equals(String str1, String str2) {
        return str1 == null ? str2 == null : str1.equals(str2);
    }

    @Override
    protected void afterProcess() {
        if (notes != null) {
            htmlDocumentFacade.getBody().appendChild(notes);
        }

        htmlDocumentFacade.updateStylesheet();
    }

    @Override
    public Document getDocument() {
        return htmlDocumentFacade.getDocument();
    }

    @Override
    protected void outputCharacters(Element pElement,
                                    CharacterRun characterRun, String text) {
        Element span = htmlDocumentFacade.getDocument().createElement("span");
        pElement.appendChild(span);

        StringBuilder style = new StringBuilder();
        BlockProperies blockProperies = this.blocksProperies.peek();
        Triplet triplet = getCharacterRunTriplet(characterRun);

        if ((triplet.fontName != null && !triplet.fontName.equals(""))
                && (!triplet.fontName.equals(
                blockProperies.pFontName))) {
            style.append("font-family:" + triplet.fontName + ";");
        }
        if (characterRun.getFontSize() / 2 != blockProperies.pFontSize) {
            style.append("font-size:" + characterRun.getFontSize() / 2 + "pt;");
        }
        if (triplet.bold) {
            style.append("font-weight:bold;");
        }
        if (triplet.italic) {
            style.append("font-style:italic;");
        }

        WordToHtmlUtils.addCharactersProperties(characterRun, style);
        if (style.length() != 0) {
            htmlDocumentFacade.addStyleClass(span, "s", style.toString());
        }

        Text textNode = htmlDocumentFacade.createText(text);
        span.appendChild(textNode);
    }

    @Override
    protected void processBookmarks(HWPFDocumentCore wordDocument,
                                    Element currentBlock, Range range, int currentTableLevel,
                                    List<Bookmark> rangeBookmarks) {
        Element parent = currentBlock;
        for (Bookmark bookmark : rangeBookmarks) {
            Element bookmarkElement = htmlDocumentFacade
                    .createBookmark(bookmark.getName());
            parent.appendChild(bookmarkElement);
            parent = bookmarkElement;
        }

        if (range != null) {
            processCharacters(wordDocument, currentTableLevel, range, parent);
        }
    }

    @Override
    protected void processDocumentInformation(
            SummaryInformation summaryInformation) {
        if (isNotEmpty(summaryInformation.getTitle())) {
            htmlDocumentFacade.setTitle(summaryInformation.getTitle());
        }

        if (isNotEmpty(summaryInformation.getAuthor())) {
            htmlDocumentFacade.addAuthor(summaryInformation.getAuthor());
        }

        if (isNotEmpty(summaryInformation.getKeywords())) {
            htmlDocumentFacade.addKeywords(summaryInformation.getKeywords());
        }

        if (isNotEmpty(summaryInformation.getComments())) {
            htmlDocumentFacade.addDescription(summaryInformation.getComments());
        }
    }

    private boolean isNotEmpty(String s) {
        return s != null && !s.equals("") && s.length() > 0;
    }

    @Override
    public void processDocumentPart(HWPFDocumentCore wordDocument, Range range) {
        super.processDocumentPart(wordDocument, range);
        afterProcess();
    }

    @Override
    protected void processDropDownList(Element block,
                                       CharacterRun characterRun, String[] values, int defaultIndex) {
        Element select = htmlDocumentFacade.createSelect();
        for (int i = 0; i < values.length; i++) {
            select.appendChild(htmlDocumentFacade.createOption(values[i],
                    defaultIndex == i));
        }
        block.appendChild(select);
    }

    @Override
    protected void processDrawnObject(HWPFDocument doc,
                                      CharacterRun characterRun, OfficeDrawing officeDrawing,
                                      String path, Element block) {
        Element img = htmlDocumentFacade.createImage(path);
        block.appendChild(img);
    }

    @Override
    protected void processEndnoteAutonumbered(HWPFDocument wordDocument,
                                              int noteIndex, Element block, Range endnoteTextRange) {
        processNoteAutonumbered(wordDocument, "end", noteIndex, block,
                endnoteTextRange);
    }

    @Override
    protected void processFootnoteAutonumbered(HWPFDocument wordDocument,
                                               int noteIndex, Element block, Range footnoteTextRange) {
        processNoteAutonumbered(wordDocument, "foot", noteIndex, block,
                footnoteTextRange);
    }

    @Override
    protected void processHyperlink(HWPFDocumentCore wordDocument,
                                    Element currentBlock, Range textRange, int currentTableLevel,
                                    String hyperlink) {
        Element basicLink = htmlDocumentFacade.createHyperlink(hyperlink);
        currentBlock.appendChild(basicLink);

        if (textRange != null) {
            processCharacters(wordDocument, currentTableLevel, textRange,
                    basicLink);
        }
    }

    @Override
    protected void processImage(Element currentBlock, boolean inlined,
                                Picture picture, String imageSourcePath) {
        final int aspectRatioX = picture.getHorizontalScalingFactor();
        final int aspectRatioY = picture.getVerticalScalingFactor();

        StringBuilder style = new StringBuilder();

        final float imageWidth;
        final float imageHeight;

        final float cropTop;
        final float cropBottom;
        final float cropLeft;
        final float cropRight;

        if (aspectRatioX > 0) {
            imageWidth = picture.getDxaGoal() * aspectRatioX / 1000.f
                    / TWIPS_PER_INCH;
            cropRight = picture.getDxaCropRight() * aspectRatioX / 1000.f
                    / TWIPS_PER_INCH;
            cropLeft = picture.getDxaCropLeft() * aspectRatioX / 1000.f
                    / TWIPS_PER_INCH;
        } else {
            imageWidth = picture.getDxaGoal() / TWIPS_PER_INCH;
            cropRight = picture.getDxaCropRight() / TWIPS_PER_INCH;
            cropLeft = picture.getDxaCropLeft() / TWIPS_PER_INCH;
        }

        if (aspectRatioY > 0) {
            imageHeight = picture.getDyaGoal() * aspectRatioY / 1000.f
                    / TWIPS_PER_INCH;
            cropTop = picture.getDyaCropTop() * aspectRatioY / 1000.f
                    / TWIPS_PER_INCH;
            cropBottom = picture.getDyaCropBottom() * aspectRatioY / 1000.f
                    / TWIPS_PER_INCH;
        } else {
            imageHeight = picture.getDyaGoal() / TWIPS_PER_INCH;
            cropTop = picture.getDyaCropTop() / TWIPS_PER_INCH;
            cropBottom = picture.getDyaCropBottom() / TWIPS_PER_INCH;
        }

        Element root;
        if (Math.abs(cropTop) + Math.abs(cropRight) + Math.abs(cropBottom) + Math.abs(cropLeft) > 0) {
            float visibleWidth = Math
                    .max(0, imageWidth - cropLeft - cropRight);
            float visibleHeight = Math.max(0, imageHeight - cropTop
                    - cropBottom);

            root = htmlDocumentFacade.createBlock();
            htmlDocumentFacade.addStyleClass(root, "d",
                    "vertical-align:text-bottom;width:" + visibleWidth
                            + "in;height:" + visibleHeight + "in;");

            // complex
            Element inner = htmlDocumentFacade.createBlock();
            htmlDocumentFacade.addStyleClass(inner, "d",
                    "position:relative;width:" + visibleWidth + "in;height:"
                            + visibleHeight + "in;overflow:hidden;");
            root.appendChild(inner);

            Element image = htmlDocumentFacade.createImage(imageSourcePath);
            htmlDocumentFacade.addStyleClass(image, "i",
                    "position:absolute;left:-" + cropLeft + ";top:-" + cropTop
                            + ";width:" + imageWidth + "in;height:"
                            + imageHeight + "in;");
            inner.appendChild(image);

            style.append("overflow:hidden;");
        } else {
            root = htmlDocumentFacade.createImage(imageSourcePath);
            root.setAttribute("style", "width:" + imageWidth + "in;height:"
                    + imageHeight + "in;vertical-align:text-bottom;");
        }

        currentBlock.appendChild(root);
    }

    @Override
    protected void processImageWithoutPicturesManager(Element currentBlock,
                                                      boolean inlined, Picture picture) {
        // no default implementation -- skip
        currentBlock.appendChild(htmlDocumentFacade.getDocument()
                .createComment("Image link to '"
                        + picture.suggestFullFileName() + "' can be here"));
    }

    @Override
    protected void processLineBreak(Element block, CharacterRun characterRun) {
        block.appendChild(htmlDocumentFacade.createLineBreak());
    }

    protected void processNoteAutonumbered(HWPFDocument doc, String type,
                                           int noteIndex, Element block, Range noteTextRange) {
        final String textIndex = String.valueOf(noteIndex + 1);
        final String textIndexClass = htmlDocumentFacade.getOrCreateCssClass(
                "a", "vertical-align:super;font-size:smaller;");
        final String forwardNoteLink = type + "note_" + textIndex;
        final String backwardNoteLink = type + "note_back_" + textIndex;

        Element anchor = htmlDocumentFacade.createHyperlink("#"
                + forwardNoteLink);
        anchor.setAttribute("name", backwardNoteLink);
        anchor.setAttribute("class", textIndexClass + " " + type
                + "noteanchor");
        anchor.setTextContent(textIndex);
        block.appendChild(anchor);

        if (notes == null) {
            notes = htmlDocumentFacade.createBlock();
            notes.setAttribute("class", "notes");
        }

        Element note = htmlDocumentFacade.createBlock();
        note.setAttribute("class", type + "note");
        notes.appendChild(note);

        Element bookmark = htmlDocumentFacade.createBookmark(forwardNoteLink);
        bookmark.setAttribute("href", "#" + backwardNoteLink);
        bookmark.setTextContent(textIndex);
        bookmark.setAttribute("class", textIndexClass + " " + type
                + "noteindex");
        note.appendChild(bookmark);
        note.appendChild(htmlDocumentFacade.createText(" "));

        Element span = htmlDocumentFacade.getDocument().createElement("span");
        span.setAttribute("class", type + "notetext");
        note.appendChild(span);

        this.blocksProperies.add(new BlockProperies("", -1));
        try {
            processCharacters(doc, Integer.MIN_VALUE, noteTextRange, span);
        } finally {
            this.blocksProperies.pop();
        }
    }

    @Override
    protected void processPageBreak(HWPFDocumentCore wordDocument, Element flow) {
        flow.appendChild(htmlDocumentFacade.createLineBreak());
    }

    @Override
    protected void processPageref(HWPFDocumentCore hwpfDocument,
                                  Element currentBlock, Range textRange, int currentTableLevel,
                                  String pageref) {
        Element basicLink = htmlDocumentFacade.createHyperlink("#" + pageref);
        currentBlock.appendChild(basicLink);

        if (textRange != null) {
            processCharacters(hwpfDocument, currentTableLevel, textRange,
                    basicLink);
        }
    }

    @Override
    protected void processParagraph(HWPFDocumentCore hwpfDocument,
                                    Element parentElement, int currentTableLevel, Paragraph paragraph,
                                    String bulletText) {
        final Element pElement = htmlDocumentFacade.createParagraph();
        parentElement.appendChild(pElement);

        StringBuilder style = new StringBuilder();
        WordToHtmlUtils.addParagraphProperties(paragraph, style);

        final int charRuns = paragraph.numCharacterRuns();

        if (charRuns == 0) {
            return;
        }

        {
            final String pFontName;
            final int pFontSize;
            final CharacterRun characterRun = paragraph.getCharacterRun(0);
            if (characterRun != null) {
                Triplet triplet = getCharacterRunTriplet(characterRun);
                pFontSize = characterRun.getFontSize() / 2;
                pFontName = triplet.fontName;
                WordToHtmlUtils.addFontFamily(pFontName, style);
                WordToHtmlUtils.addFontSize(pFontSize, style);
            } else {
                pFontSize = -1;
                pFontName = "";
            }
            blocksProperies.push(new BlockProperies(pFontName, pFontSize));
        }
        try {
            if (isNotEmpty(bulletText)) {
                if (bulletText.endsWith("\t")) {
                    /*
                     * We don't know how to handle all cases in HTML, but at
                     * least simplest case shall be handled
                     */
                    final float defaultTab = TWIPS_PER_INCH / 2;
                    // char have some space
                    float firstLinePosition = paragraph.getIndentFromLeft()
                            + paragraph.getFirstLineIndent() + 20f;

                    float nextStop = (float) (Math.ceil(firstLinePosition
                            / defaultTab) * defaultTab);

                    final float spanMinWidth = nextStop - firstLinePosition;

                    Element span = htmlDocumentFacade.getDocument()
                            .createElement("span");
                    htmlDocumentFacade
                            .addStyleClass(span, "s",
                                    "display: inline-block; text-indent: 0; min-width: "
                                            + (spanMinWidth / TWIPS_PER_INCH)
                                            + "in;");
                    pElement.appendChild(span);

                    Text textNode = htmlDocumentFacade.createText(bulletText
                            .substring(0, bulletText.length() - 1)
                            + UNICODECHAR_ZERO_WIDTH_SPACE
                            + UNICODECHAR_NO_BREAK_SPACE);
                    span.appendChild(textNode);
                } else {
                    Text textNode = htmlDocumentFacade.createText(bulletText
                            .substring(0, bulletText.length() - 1));
                    pElement.appendChild(textNode);
                }
            }

            processCharacters(hwpfDocument, currentTableLevel, paragraph,
                    pElement);
        } finally {
            blocksProperies.pop();
        }

        if (style.length() > 0) {
            htmlDocumentFacade.addStyleClass(pElement, "p", style.toString());
        }

        compactSpans(pElement);
        return;
    }

    private void compactSpans(Element pElement) {
        compactChildNodesR(pElement, "span");
    }


    private void compactChildNodesR(Element parentElement, String childTagName) {
        NodeList childNodes = parentElement.getChildNodes();
        //修改原方法,添加childNodes为空判断
        if (childNodes != null) {
            for (int i = 0; i < childNodes.getLength() - 1; i++) {
                Node child1 = childNodes.item(i);
                Node child2 = childNodes.item(i + 1);
                if (!canBeMerged(child1, child2, childTagName))
                    continue;

                // merge
                while (child2.getChildNodes().getLength() > 0)
                    child1.appendChild(child2.getFirstChild());
                //添加判断
                if (child2.getParentNode() != null) {
                    child2.getParentNode().removeChild(child2);
                    i--;
                }

            }
        }

        childNodes = parentElement.getChildNodes();
        if (childNodes != null) {
            for (int i = 0; i < childNodes.getLength() - 1; i++) {
                Node child = childNodes.item(i);
                if (child instanceof Element) {
                    compactChildNodesR((Element) child, childTagName);
                }
            }
        }
    }

    private boolean canBeMerged(Node node1, Node node2, String requiredTagName) {
        if (node1.getNodeType() != Node.ELEMENT_NODE
                || node2.getNodeType() != Node.ELEMENT_NODE)
            return false;

        Element element1 = (Element) node1;
        Element element2 = (Element) node2;

        if (!equals(requiredTagName, element1.getTagName())
                || !equals(requiredTagName, element2.getTagName()))
            return false;

        NamedNodeMap attributes1 = element1.getAttributes();
        NamedNodeMap attributes2 = element2.getAttributes();

        if (attributes1.getLength() != attributes2.getLength())
            return false;

        for (int i = 0; i < attributes1.getLength(); i++) {
            final Attr attr1 = (Attr) attributes1.item(i);
            final Attr attr2;
            if (isNotEmpty(attr1.getNamespaceURI()))
                attr2 = (Attr) attributes2.getNamedItemNS(
                        attr1.getNamespaceURI(), attr1.getLocalName());
            else
                attr2 = (Attr) attributes2.getNamedItem(attr1.getName());

            if (attr2 == null
                    || !equals(attr1.getTextContent(), attr2.getTextContent()))
                return false;
        }

        return true;
    }

    @Override
    protected void processSection(HWPFDocumentCore wordDocument,
                                  Section section, int sectionCounter) {
        Element div = htmlDocumentFacade.createBlock();
        htmlDocumentFacade.addStyleClass(div, "d", getSectionStyle(section));
        htmlDocumentFacade.getBody().appendChild(div);

        processParagraphes(wordDocument, div, section, Integer.MIN_VALUE);
    }

    @Override
    protected void processSingleSection(HWPFDocumentCore wordDocument,
                                        Section section) {
        htmlDocumentFacade.addStyleClass(htmlDocumentFacade.getBody(), "b",
                getSectionStyle(section));

        processParagraphes(wordDocument, htmlDocumentFacade.getBody(), section,
                Integer.MIN_VALUE);
    }

    @Override
    protected void processTable(HWPFDocumentCore hwpfDocument, Element flow,
                                Table table) {
        Element tableHeader = htmlDocumentFacade.createTableHeader();
        Element tableBody = htmlDocumentFacade.createTableBody();

        final int[] tableCellEdges = buildTableCellEdgesArray(table);
        final int tableRows = table.numRows();

        int maxColumns = Integer.MIN_VALUE;
        for (int r = 0; r < tableRows; r++) {
            maxColumns = Math.max(maxColumns, table.getRow(r).numCells());
        }

        for (int r = 0; r < tableRows; r++) {
            TableRow tableRow = table.getRow(r);

            Element tableRowElement = htmlDocumentFacade.createTableRow();
            StringBuilder tableRowStyle = new StringBuilder();
            WordToHtmlUtils.addTableRowProperties(tableRow, tableRowStyle);

            // index of current element in tableCellEdges[]
            int currentEdgeIndex = 0;
            final int rowCells = tableRow.numCells();
            for (int c = 0; c < rowCells; c++) {
                TableCell tableCell = tableRow.getCell(c);

                if (tableCell.isVerticallyMerged()
                        && !tableCell.isFirstVerticallyMerged()) {
                    currentEdgeIndex += getNumberColumnsSpanned(
                            tableCellEdges, currentEdgeIndex, tableCell);
                    continue;
                }

                Element tableCellElement;
                if (tableRow.isTableHeader()) {
                    tableCellElement = htmlDocumentFacade
                            .createTableHeaderCell();
                } else {
                    tableCellElement = htmlDocumentFacade.createTableCell();
                }
                StringBuilder tableCellStyle = new StringBuilder();
                WordToHtmlUtils.addTableCellProperties(tableRow, tableCell,
                        r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
                        tableCellStyle);

                int colSpan = getNumberColumnsSpanned(tableCellEdges,
                        currentEdgeIndex, tableCell);
                currentEdgeIndex += colSpan;

                if (colSpan == 0) {
                    continue;
                }

                if (colSpan != 1) {
                    tableCellElement.setAttribute("colspan",
                            String.valueOf(colSpan));
                }

                final int rowSpan = getNumberRowsSpanned(table,
                        tableCellEdges, r, c, tableCell);
                if (rowSpan > 1) {
                    tableCellElement.setAttribute("rowspan",
                            String.valueOf(rowSpan));
                }

                processParagraphes(hwpfDocument, tableCellElement, tableCell,
                        table.getTableLevel());

                if (!tableCellElement.hasChildNodes()) {
                    tableCellElement.appendChild(htmlDocumentFacade
                            .createParagraph());
                }
                if (tableCellStyle.length() > 0) {
                    htmlDocumentFacade.addStyleClass(tableCellElement,
                            tableCellElement.getTagName(),
                            tableCellStyle.toString());
                }

                tableRowElement.appendChild(tableCellElement);
            }

            if (tableRowStyle.length() > 0) {
                tableRowElement.setAttribute("class", htmlDocumentFacade
                        .getOrCreateCssClass("r", tableRowStyle.toString()));
            }

            if (tableRow.isTableHeader()) {
                tableHeader.appendChild(tableRowElement);
            } else {
                tableBody.appendChild(tableRowElement);
            }
        }

        final Element tableElement = htmlDocumentFacade.createTable();
        tableElement
                .setAttribute(
                        "class",
                        htmlDocumentFacade
                                .getOrCreateCssClass("t",
                                        "table-layout:fixed;border-collapse:collapse;border-spacing:0;"));
        if (tableHeader.hasChildNodes()) {
            tableElement.appendChild(tableHeader);
        }
        if (tableBody.hasChildNodes()) {
            tableElement.appendChild(tableBody);
            flow.appendChild(tableElement);
        } else {
            logger.log(POILogger.WARN, "Table without body starting at [",
                    Integer.valueOf(table.getStartOffset()), "; ",
                    Integer.valueOf(table.getEndOffset()), ")");
        }
    }

    private int[] buildTableCellEdgesArray(Table table) {
        Set<Integer> edges = new TreeSet<Integer>();

        for (int r = 0; r < table.numRows(); r++) {
            TableRow tableRow = table.getRow(r);
            for (int c = 0; c < tableRow.numCells(); c++) {
                TableCell tableCell = tableRow.getCell(c);

                edges.add(Integer.valueOf(tableCell.getLeftEdge()));
                edges.add(Integer.valueOf(tableCell.getLeftEdge()
                        + tableCell.getWidth()));
            }
        }

        Integer[] sorted = edges.toArray(new Integer[edges.size()]);
        int[] result = new int[sorted.length];
        for (int i = 0; i < sorted.length; i++) {
            result[i] = sorted[i].intValue();
        }

        return result;
    }

    /**
     * Holds properties values, applied to current <tt>p</tt> element. Those
     * properties shall not be doubled in children <tt>span</tt> elements.
     */
    private static class BlockProperies {
        final String pFontName;
        final int pFontSize;

        public BlockProperies(String pFontName, int pFontSize) {
            this.pFontName = pFontName;
            this.pFontSize = pFontSize;
        }
    }

}

有时文档读出来后会少文字,无故丢失,其实是读出来的,但是在做处理的时候,有一个bug使得逻辑不正确,可以对代码做如下修改:

private void compactChildNodesR(Element parentElement, String childTagName) {
        NodeList childNodes = parentElement.getChildNodes();
        //修改原方法,添加childNodes为空判断
        if (childNodes != null) {
            for (int i = 0; i < childNodes.getLength() - 1; i++) {
                Node child1 = childNodes.item(i);
                Node child2 = childNodes.item(i + 1);
                if (!canBeMerged(child1, child2, childTagName))
                    continue;

                // merge
                while (child2.getChildNodes() != null && child2.getChildNodes().getLength() > 0) {
                    child1.appendChild(child2.getFirstChild());
                }
                //添加判断
                if (child2.getParentNode() != null) {
                    child2.getParentNode().removeChild(child2);
                    childNodes = parentElement.getChildNodes();//添加此行代码,绕过child2的parentNode会变为null的问题。
                    i--;
                } else {

                }

            }
        }

        childNodes = parentElement.getChildNodes();
        if (childNodes != null) {
            for (int i = 0; i < childNodes.getLength() - 1; i++) {
                Node child = childNodes.item(i);
                if (child instanceof Element) {
                    compactChildNodesR((Element) child, childTagName);
                }
            }
        }
    }
  1. Activity里的代码是参照别人写的(就是拷过来直接用了),http://www.cnblogs.com/esrichina/p/3347454.html
    同时感谢其提供方法
  2. POI官网 http://poi.apache.org ,我用的最新版,jar包可以在此网站下载

展开阅读全文

没有更多推荐了,返回首页