word与xml、png、doc、docx互转完整版

1.创建maven项目,引入下面的依赖

    <dependencies>
        <!--jacob依赖-->
        <dependency>
            <groupId>net.sf.jacob-project</groupId>
            <artifactId>jacob</artifactId>
            <version>1.19</version>
        </dependency>
        <!--hutool-all依赖-->
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.5.4</version>
        </dependency>
        <!--fastjson依赖-->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.58</version>
        </dependency>
        <!-- jdom依赖 -->
        <dependency>
            <groupId>org.jdom</groupId>
            <artifactId>jdom</artifactId>
            <version>2.0.2</version>
        </dependency>
        <!--poi依赖-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>4.0.1</version>
        </dependency>
        <!--pdf依赖-->
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.21</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>fontbox</artifactId>
            <version>2.0.21</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itext7-core</artifactId>
            <version>7.1.0</version>
            <type>pom</type>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

2.创建JDom类

package com.chang.util;

/**
 * @Auther: liao
 * @Description:
 */
public class JDom {
   

    /**
     * doc转docx
     * @param docPath  doc的全路径
     * @param docxPath docx的全路径
     * @return  docx的全路径
     */
    public static String docToDocx(String docPath,String docxPath) {
   
        POI_DOC.docToDocx(docPath,docxPath);
        return docxPath;
    }

    /**
     * word转pdf
     * @param wordPath  word的全路径
     * @param pdfPath   pdf的全路径
     * @return   生成的pdf路径
     */
    public static String wordToPdf(String wordPath, String pdfPath) {
   
        POI_DOC.wordToPdf(wordPath,pdfPath);
        return pdfPath;
    }

    /**
     * pdf转图片,pdf的每一页转换成每一张图片
     * @param pdfPath  pdf的全路径  D:\code\21高邮01账户信息表.pdf
     * @param pngPath  png要放的位置的目录 D:\code\21高邮01账户信息表
     */
    public static void  pdfTopng(String pdfPath, String pngPath) {
   
        POI_DOC. pdfTopng(pdfPath,pngPath);
    }

    /**
     * doc或docx 转成xml
     * @param wordPath doc或docx的绝对路径   如:D:\\code\\信息表.doc
     * @param xmlPath  xml的绝对路径    如:D:\\code\\信息表.xml
     * @return word转成的xml的绝对路径,即xmlPath的值
     */
    public static String wordToXML(String wordPath,String xmlPath) {
   
        POI_DOC.wordToXml(wordPath,xmlPath);
        return xmlPath;
    }

    /**
     * xml转成doc或docx
     * @param xmlPath xml的绝对路径  如:D:\\code\\信息表.xml
     * @param wordPath doc或docx的绝对路径   如:D:\\code\\信息表.doc
     * @return 转成的word的绝对路径 即wordPath的值
     */
    public static String xmlToWord(String xmlPath,String wordPath) {
   
        POI_DOC.xmlToWord(xmlPath,wordPath);
        return wordPath;
    }

    /**
     * docx转doc
     * @param docxPath  docx的全路径
     * @param docPath doc的全路径
     * @return  docx的全路径
     */
    public static String docxToDoc(String docxPath,String docPath) {
   
        POI_DOC.docxToDoc(docxPath,docPath);
        return docxPath;
    }



}

3.创建POI_DOC类

package com.chang.util;

import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;

public class POI_DOC {
   

    static final int wdDoNotSaveChanges = 0;// 不保存待定的更改。
    static final int wdFormatPDF = 17;// word转PDF 格式
    public static final String PNG_SUFFIX = ".jpg";

    //doc或docx转成xml
    public static void wordToXml(String inputFIle, String outputFile) {
   
        try {
   
            //启动word
            ActiveXComponent app = new ActiveXComponent( "Word.Application");
            String inFile = inputFIle; //指定要打开的word文件的路径
            app.setProperty("Visible", new Variant(true)); //为false時設置word不可見,為true時是可見要不然看不到Word打開文件的過程
            Dispatch docs = app.getProperty("Documents").toDispatch();
            //打開編輯囂
            Dispatch doc = Dispatch.invoke(docs, "Open", Dispatch.Method, new Object[] {
   inFile, new Variant(false), new Variant(true)} , new int[1]).toDispatch(); //打開word文檔
            Dispatch.call(doc, "SaveAs", outputFile, 19);//xml文件格式宏11
            Dispatch.call(doc, "Close", false);
            app.invoke("Quit",0);
        }catch (Exception e) {
   
            e.printStackTrace();
        }
    }

    //xml转成doc或docx
    public static void xmlToWord(String xmlPath,String wordPath) {
   
        // 启动word应用程序(Microsoft Office Word 2003)
        ActiveXComponent app = new ActiveXComponent("Word.Application");
        File file=new File(xmlPath);
        System.out.println("正在转换     "+file.getAbsolutePath()+"");
        try {
   
            // 设置word应用程序不可见
            app.setProperty("Visible", new Variant(false));
            // documents表示word程序的所有文档窗口,(word是多文档应用程序)
            Dispatch docs = app.getProperty("Documents").toDispatch();
            // 打开要转换的word文件
            Dispatch doc = Dispatch.invoke(
                    docs,
                    "Open",
                    Dispatch.Method,
                    new Object[] {
    file.getAbsolutePath(), new Variant(false),
                            new Variant() }, new int[1]).toDispatch();
            // 作为type格式保存到临时文件
            // *Variant(0):doc
            // *Variant(1):dot
            // *Variant(2-5),Variant(7):txt
            // *Variant(6):rft
            // *Variant(8),Variant(10):htm
            // *Variant(9):mht
            // *Variant(11),Variant(19-22):xml
            // *Variant(12):docx
            // *Variant(13):docm
            // *Variant(14):dotx
            // *Variant(15):dotm
            // *Variant(16)、Variant(24):docx
            // *Variant(17):pdf
            // *Variant(18):xps
            // *Variant(23):odt
            // *Variant(25):与Office2003与2007的转换程序相关,执行本程序后弹出一个警告框说是需要更高版本的 Microsoft
            int type=12;
            Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {
   
                    wordPath, new Variant(type) }, new int[1]);
            // 关闭word文件
            Dispatch.call(doc, "Close", new Variant(false));
        } catch (Exception e) {
   
            System.out.println("*******转换出错********");
        } finally {
   
            // 关闭word应用程序
            app.invoke("Quit", new Variant[] {
   });
        }
        System.out.println("*******转换完毕********");
    }

    //word转pdf
    public static boolean wordToPdf(String wordPath, String pdfPath) {
   
        System.out.println("Word转PDF开始启动...");
        long start = System.currentTimeMillis();
        ActiveXComponent app = null;
        try {
   
            app = new ActiveXComponent("Word.Application");
            app.setProperty("Visible",  new Variant(false));
            Dispatch docs = app.getProperty("Documents").toDispatch();
            //Dispatch   wordFile=Dispatch.invoke(docs, "Open", Dispatch.Method, new Object[]{
   source,new Variant(true),new Variant(false)}, new int[1]).toDispatch();
            System.out.println("打开文档:" + wordPath);
            //Dispatch.call((Dispatch) Dispatch.call(app, "WordBasic").getDispatch(),"FileSaveAs", source);
            Dispatch wordFile = Dispatch.call(docs, "Open", wordPath, false, false).toDispatch();
            Dispatch.put(wordFile,"TrackRevisions",new Variant(false));
            Dispatch.put(wordFile,"PrintRevisions",new Variant(false));
            Dispatch.put(wordFile,"ShowRevisions",new Variant(false));
            Dispatch.call(wordFile, "AcceptAllRevisions");
            //System.out.println("转换文档到PDF:" + target);
            File tofile = new File(pdfPath);
            if (tofile.exists()) {
   
                tofile.delete();
            }
            Dispatch.call(wordFile, "SaveAs", pdfPath, wdFormatPDF);
            Dispatch.call(wordFile, "Close", false);
            long end = System.currentTimeMillis();
            //System.out.println("Word转PDF,用时:" + (end - start) + "ms");
            return true;
        } catch (Exception e) {
   
            e.printStackTrace();
            System.out.println("Word转PDF出错:" + e.getMessage());
            return false;
        } finally {
   
            if (app != null) {
   
                app.invoke("Quit", wdDoNotSaveChanges);
            }
        }
    }

    //doc转docx
    public static void docToDocx(String docPath,String docxPath) {
   
        // 启动word应用程序(Microsoft Office Word 2003)
        ActiveXComponent app = new ActiveXComponent("Word.Application");
        File file=new File(docPath);
        System.out.println("正在转换     "+file.getAbsolutePath()+"");
        try {
   
            // 设置word应用程序不可见
            app.setProperty("Visible", new Variant(false));
            // documents表示word程序的所有文档窗口,(word是多文档应用程序)
            Dispatch docs = app.getProperty("Documents").toDispatch();
            // 打开要转换的word文件
            Dispatch doc = Dispatch.invoke(
                    docs,
                    "Open",
                    Dispatch.Method,
                    new Object[] {
    file.getAbsolutePath(), new Variant(false),
                            new Variant(true) }, new int[1]).toDispatch();
            String savePath=file.getAbsolutePath().replaceAll(".doc$","");
            // 作为type格式保存到临时文件
            // *Variant(0):doc
            // *Variant(1):dot
            // *Variant(2-5),Variant(7):txt
            // *Variant(6):rft
            // *Variant(8),Variant(10):htm
            // *Variant(9):mht
            // *Variant(11),Variant(19-22):xml
            // *Variant(12):docx
            // *Variant(13):docm
            // *Variant(14):dotx
            // *Variant(15):dotm
            // *Variant(16)、Variant(24):docx
            // *Variant(17):pdf
            // *Variant(18):xps
            // *Variant(23):odt
            // *Variant(25):与Office2003与2007的转换程序相关,执行本程序后弹出一个警告框说是需要更高版本的 Microsoft
            int type=12;
            Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {
   
                    docxPath, new Variant(type) }, new int[1]);
            // 关闭word文件
            Dispatch.call(doc, "Close", new Variant(false));
        } catch (Exception e) {
   
            System.out.println("*******转换出错********");
        } finally {
   
            // 关闭word应用程序
            app.invoke("Quit", new Variant[] {
   });
        }
        System.out.println("*******转换完毕********");
    }

    //docx转docx
    public static void docxToDoc(String docxPath,String docPath) {
   
        // 启动word应用程序(Microsoft Office Word 2003)
        ActiveXComponent app = new ActiveXComponent("Word.Application");
        File file=new File(docxPath);
        System.out.println("正在转换     "+file.getAbsolutePath()+"");
        try {
   
            // 设置word应用程序不可见
            app.setProperty("Visible", new Variant(false));
            // documents表示word程序的所有文档窗口,(word是多文档应用程序)
            Dispatch docs = app.getProperty("Documents").toDispatch();
            // 打开要转换的word文件
            Dispatch doc = Dispatch.invoke(
                    docs,
                    "Open",
                    Dispatch.Method,
                    new Object[] {
    file.getAbsolutePath(), new Variant(false),
                            new Variant(true) }, new int[1]).toDispatch();
            //    String savePath=file.getAbsolutePath().replaceAll(".doc$","");

            // 作为type格式保存到临时文件
            // *Variant(0):doc
            // *Variant(1):dot
            // *Variant(2-5),Variant(7):txt
            // *Variant(6):rft
            // *Variant(8),Variant(10):htm
            // *Variant(9):mht
            // *Variant(11),Variant(19-22):xml
            // *Variant(12):docx
            // *Variant(13):docm
            // *Variant(14):dotx
            // *Variant(15):dotm
            // *Variant(16)、Variant(24):docx
            // *Variant(17):pdf
            // *Variant(18):xps
            // *Variant(23):odt
            // *Variant(25):与Office2003与2007的转换程序相关,执行本程序后弹出一个警告框说是需要更高版本的 Microsoft

            int type=0;
            Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {
   
                    docPath, new Variant(type) }, new int[1]);
            // 关闭word文件
            Dispatch.call(doc, "Close", new Variant(false));
        } catch (Exception e) {
   
            System.out.println("*******转换出错********");
        } finally {
   
            // 关闭word应用程序
            app.invoke("Quit", new Variant[] {
   });
        }
        System.out.println("*******转换完毕********");
    }

    //pdf转png
    public static void pdfTopng(String pdfPath, String pngPath) {
   
        File file = new File(pdfPath);
        // 将pdf装图片 并且自定义图片得格式大小
        PDDocument doc=null;
        try {
   
            doc = PDDocument.load(file);
            PDFRenderer renderer = new PDFRenderer(doc);
            int pageCount = doc.getNumberOfPages();
            for (int i = 0; i < pageCount; i++) {
   
                BufferedImage image = renderer.renderImageWithDPI(i, 72); // Windows native DPI
                File imageFile = new File(pngPath+"{" + i +"}" + PNG_SUFFIX);
                if (!imageFile.exists()){
   
                    imageFile.mkdirs();
                }
                ImageIO.write(image, "jpg", imageFile );
                image.flush();
            }
        } catch (IOException e) {
   
            e.printStackTrace();
        }finally {
   
            try {
   
                doc.close();
            } catch (IOException e) {
   
                e.printStackTrace();
            }
        }
    }



    public static String readWord(String path) {
   
        String buffer = "";
        try {
   
            if (path.endsWith(".doc")) {
   
                InputStream is = new FileInputStream(new File(path));
                WordExtractor ex = new WordExtractor(is);
                buffer = ex.getText();
                ex.close();
            } else if (path.endsWith("docx")) {
   
                OPCPackage opcPackage = POIXMLDocument.openPackage(path);
                POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
                buffer = extractor.getText();
                extractor.close();
            } else {
   
                System.out.println("此文件不是word文件!");
            }

        } catch (Exception e) {
   
            e.printStackTrace();
        }
        return buffer;
    }


        public static void extractDoc(String inputFIle, String outputFile) {
   
            boolean flag = false;
            // 打开Word 应用程序
            ActiveXComponent app = new ActiveXComponent("Word.Application");
            try {
   
                // 设置word 不可见
                app.setProperty("Visible", new Variant(false));
                // 打开word 文件
                Dispatch doc1 = app.getProperty("Documents").toDispatch();

                Dispatch doc2 = Dispatch.invoke(
                        doc1,
                        "Open",
                        Dispatch.Method,
                        new Object[] {
    inputFIle, new Variant(false),
                                new Variant(true) }, new int[1]).toDispatch();
                Dispatch.call(doc2, "ConvertNumbersToText");
                // 作为txt 格式保存到临时文件
                Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[] {
   
                        outputFile, new Variant(7) }, new int[1]);
                // 关闭word
                Variant f = new Variant(false);
                Dispatch.call(doc2, "Close", f);
                flag = true;
            } catch (Exception e) {
   
                e.printStackTrace();
            } finally {
   
                app.invoke("Quit", new Variant[] {
   });
            }
            if (flag == true) {
   
                System.out.println("Transformed Successfully");
            } else {
   
                System.out.println("Transform Failed");
            }
        }
    public static String readTxtFile(String filePath){
   
        StringBuffe
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值