POI工具类(word转html)

最新推荐文章于 2024-05-13 17:47:15 发布
Z_海瑞_Z
最新推荐文章于 2024-05-13 17:47:15 发布
阅读量700
点赞数 1
分类专栏：工具类文章标签： poi
ZHRZ
本文链接：https://blog.csdn.net/qq_20286065/article/details/109624472
版权
工具类专栏收录该内容
17 篇文章 1 订阅
订阅专栏
import fr.opensagres.xdocreport.core.io.IOUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

public class POIUtil {

    public POIUtil() {}

   /**
     * 2003world转html
     * @param sourceFilePath 2003world文件路径
     * @param targetFilePath 转成html后的地址
     * @return  转成html后，保存的html路径
     * @throws Exception
     */
    public static String docToHtml(String sourceFilePath, String targetFilePath) throws Exception {
//        String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
//        String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";
        Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
        wordToHtmlConverter.processDocument(new HWPFDocument(new FileInputStream(sourceFilePath)));
        Transformer serializer = TransformerFactory.newInstance().newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(new DOMSource(wordToHtmlConverter.getDocument()), new StreamResult(new File(targetFilePath)));
        return targetFilePath;
    }

    /**
     * 2007转html
     * @param sourceFilePath 2007world文件路径
     * @param targetFilePath 转成html后的地址
     * @return
     * @throws Exception
     */
    public static String docxToHtml(String sourceFilePath, String targetFilePath) throws Exception {
//        String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
//        String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";

        OutputStreamWriter outputStreamWriter = null;
        try {
            XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFilePath));
            XHTMLOptions options = XHTMLOptions.create();
            outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFilePath), "utf-8");
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, outputStreamWriter, options);
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
        return targetFilePath;
    }

    public static String docToHtml(InputStream fileInputStream) throws Exception {
//        String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
//        String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";
        Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
        wordToHtmlConverter.processDocument(new HWPFDocument(fileInputStream));

        Transformer serializer = TransformerFactory.newInstance().newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        serializer.transform(new DOMSource(wordToHtmlConverter.getDocument()), new StreamResult(baos));
        return IOUtils.toString(new ByteArrayInputStream(baos.toByteArray()), "UTF-8");
//        return targetFilePath;
    }

    public static String docxToHtml(InputStream fileInputStream) throws Exception {
//        String sourceFilePath = "C:\\Users\\Administrator\\Desktop\\aaa.docx";
//        String targetFilePath = "C:\\Users\\Administrator\\Desktop\\ttttt.html";

        String htmlStr = "";
        OutputStreamWriter outputStreamWriter = null;
        try {
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            xhtmlConverter.convert(new XWPFDocument(fileInputStream), baos, XHTMLOptions.create());
            htmlStr = IOUtils.toString(new ByteArrayInputStream(baos.toByteArray()));
        } finally {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
        }
        return htmlStr;
    }

    /**
     * 读取文件
     * @param filePath 需要读取的文件路径
     * @return
     */
    public static String readFile(String filePath) {
        File file = new File(filePath);
        InputStream input = null;
        try {
            input = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        StringBuffer buffer = new StringBuffer();
        byte[] bytes = new byte[1024];
        try {
            for (int n; (n = input.read(bytes)) != -1;) {
                buffer.append(new String(bytes, 0, n, "utf8"));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
//        return buffer.toString();
        return changeStyle(buffer);
    }

    public static String changeStyle(StringBuffer buff) {
        StringBuffer buffStyle = new StringBuffer();
        //截取样式代码
        buffStyle.append(buff.substring(buff.indexOf("<style type=\"text/css\">") + 20, buff.indexOf("</style>")));
//        System.out.println(buffStyle);
        //截取body代码
        String body = buff.substring(buff.indexOf("<body"),buff.indexOf("</body")+7);
        body = body.replaceAll("body","div");
        StringBuffer bodyBuffer = new StringBuffer(body);
//        System.out.println(bodyBuffer);
        String[] split = buffStyle.toString().split("}");
        Map<String,String> styleMap = new HashMap<>();
        for (String s1 : split) {
//            System.out.println(s1);
            String[] split1 = s1.split("\\{");
            if (null != split1 && 1 < split1.length) {
                styleMap.put(split1[0].substring(1),split1[1]);
            }
        }
        Set<String> strings = styleMap.keySet();
        for (String key : strings) {
//            System.out.print("key : "+key);
//            System.out.println("   value : "+styleMap.get(key));
            //将嵌入样式转换为行内样式
            if(bodyBuffer.toString().contains(key)){
                int length = bodyBuffer.toString().split(key).length - 1 ;
                int temp = 0 ;
                for (int i = 0 ; i < length ; i++){
                    //首先判断是否完全匹配这个样式的class标识
                    //由于word转换为html的时候他会自动生成class的标识  比如 p1,p2,p3,p4,p10,p11这样的话使用contains方法
                    //p1就会被p11匹配到，这样样式就会乱掉，所以在添加行内样式之前必须要进行完全匹配
                    temp = bodyBuffer.indexOf(key,temp);
                    String isComplete = bodyBuffer.substring(temp, temp + key.length() + 1);
                    //这个地方key+" "意思是代表可能一个标签里面有多个class标识 比如 class = "p2 p3 p4"
                    if(!isComplete.equals(key+"\"") && !isComplete.equals(key+" ")){
                        //这种就代表不是完全匹配
                        continue;
                    }
                    //这个是每次查询到的位置，判断此标签中是否添加了style标签
                    String isContaionStyle = bodyBuffer.substring(temp,bodyBuffer.indexOf(">",temp));
                    if(isContaionStyle.contains("style")){
                        //代表已经存在此style，那么直接加进去就好了
                        //首先找到style的位置
                        int styleTemp = bodyBuffer.indexOf("style",temp);
                        bodyBuffer.insert(styleTemp+7,styleMap.get(key));
                    }else{
                        //代表没有style，那么直接插入style
                        int styleIndex = bodyBuffer.indexOf("\"",temp);
                        bodyBuffer.insert(styleIndex+1," style=\""+styleMap.get(key)+"\"");
                    }
                    temp += key.length() + 1;
                }
            }
        }
        return bodyBuffer.toString();
//        changePicture(bodyBuffer);
    }

    //更换图片的路径
    public void changePicture(StringBuffer buffer) {
        //查询一个有多少个图片
        int length = buffer.toString().split("<img src=\"").length - 1;
        int temp = 0;
        for (int i = 0; i < length; i++) {
            temp = buffer.indexOf("<img src=\"", temp);
            String srcContent = buffer.substring(temp + 10, buffer.indexOf("style", temp + 10));
            //获取第三方文件服务器的路径,比如如下realSrc
            String realSrc = "";
            //将路径进行替换
            buffer.replace(temp + 10, buffer.indexOf("style", temp + 10), realSrc + "\"");
            temp++;
        }
    }

    public static ByteArrayInputStream  html2World(String content) throws Exception {
        byte b[] = content.getBytes("utf-8");  //这里是必须要设置编码的，不然导出中文就会乱码。
        //生成word
        POIFSFileSystem poifs = new POIFSFileSystem();
        DocumentEntry documentEntry = poifs.getRoot().createDocument("WordDocument", new ByteArrayInputStream(b));

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        poifs.writeFilesystem(baos);

        return new ByteArrayInputStream(baos.toByteArray());
    }
}
Z_海瑞_Z
关注
1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
POI工具类(word转html)

import fr.opensagres.xdocreport.core.io.IOUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.poifs.filesystem.DirectoryEntry;import org.apache.poi.poifs.filesystem.DocumentEntry;
复制链接

扫一扫