JAVA将多个word及pdf文件合并至一个word

JAVA将多个word及pdf文件合并至一个word

pom.xml

<dependency>
   <groupId>fr.opensagres.xdocreport</groupId>
   <artifactId>org.apache.poi.xwpf.converter.core</artifactId>
   <version>1.0.6</version>
</dependency>
<dependency>
   <groupId>fr.opensagres.xdocreport</groupId>
   <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
   <version>1.0.6</version>
</dependency>

工具类DocumentMergeUtil.java

package org.admin.util;

import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;

import javax.imageio.ImageIO;
import javax.servlet.http.HttpServletResponse;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.URL;
import java.util.*;


public class DocumentMergeUtil {

   static Map<String, String> levelMap = new HashMap<>();

   static {
       levelMap.put("1", "标题 1");
       levelMap.put("2", "标题 2");
       levelMap.put("3", "标题 3");
   }

   /**
    * 文档
    *
    * @param xwpfDocumentList
    * @return
    * @throws Exception
    */
   public static XWPFDocument merge(List<XWPFDocument> xwpfDocumentList) throws Exception {

       XWPFDocument doc = null;
       for (int i = 0; i < xwpfDocumentList.size(); i++) {
           doc = xwpfDocumentList.get(0);
           if (i != 0) {
               xwpfDocumentList.get(i).createParagraph().setPageBreak(true);
               appendBody(doc, xwpfDocumentList.get(i));
           }
       }
       doc.createParagraph().setPageBreak(true);

       return doc;
   }

   public static XWPFDocument createTitleXWPFDocument(String title, String level) throws Exception {

       XWPFDocument document = new XWPFDocument();
       XWPFParagraph paragraph = document.getParagraphs().get(0);

       // 段落的格式,下面及个设置,将使新添加的文字向左对其,无缩进.
       paragraph.setIndentationLeft(0);
       paragraph.setIndentationHanging(0);
       paragraph.setAlignment(ParagraphAlignment.LEFT);
       // paragraph.setWordWrap( true );
       paragraph.setStyle(getLevelValue(level));
       // 在段落中新插入一个run,这里的run我理解就是一个word文档需要显示的个体,里面可以放文字,参数0代表在段落的最前面插入
       XWPFRun run = paragraph.createRun();
       run.addCarriageReturn();
       // 设置run内容
       run.setText(title);
       run.setFontFamily("宋体");
       run.setBold(true);
       run.setFontSize(20);
       run.addBreak(BreakType.TEXT_WRAPPING);
       run.addCarriageReturn();

       return document;
   }

   public static void appendBody(XWPFDocument src, XWPFDocument append) throws Exception {

//        for(XWPFParagraph p : append.getParagraphs())
//        src.getParagraphs().add(p);

       CTBody src1Body = src.getDocument().getBody();
       CTBody src2Body = append.getDocument().getBody();

       List<XWPFPictureData> allPictures = append.getAllPictures();
       // 记录图片合并前及合并后的ID
       Map<String, String> map = new HashMap();

       for (XWPFPictureData picture : allPictures) {
           String before = append.getRelationId(picture);
           //将原文档中的图片加入到目标文档中
           String after = src.addPictureData(picture.getData(), Document.PICTURE_TYPE_PNG);
           map.put(before, after);
       }

       appendBody(src1Body, src2Body, map);

   }

   private static void appendBody(CTBody src, CTBody append, Map<String, String> map) throws Exception {
       XmlOptions optionsOuter = new XmlOptions();
       optionsOuter.setSaveOuter();
       String appendString = append.xmlText(optionsOuter);

       String srcString = src.xmlText();
       String prefix = srcString.substring(0, srcString.indexOf(">") + 1);
       String mainPart = srcString.substring(srcString.indexOf(">") + 1, srcString.lastIndexOf("<"));
       String sufix = srcString.substring(srcString.lastIndexOf("<"));
       String addPart = appendString.substring(appendString.indexOf(">") + 1, appendString.lastIndexOf("<"));

       if (map != null && !map.isEmpty()) {
           //对xml字符串中图片ID进行替换
           for (Map.Entry<String, String> set : map.entrySet()) {
               addPart = addPart.replace(set.getKey(), set.getValue());
           }
       }
       //将两个文档的xml内容进行拼接
       CTBody makeBody = CTBody.Factory.parse(prefix + mainPart + addPart + sufix);

       src.set(makeBody);
   }

   /**
    * 从pdf文档中读取所有的图片列表
    *
    * @return
    * @throws Exception
    */
   public static List<BufferedImage> getImageListFromPDF(PDDocument document, Integer startPage) throws Exception {
       List<BufferedImage> imageList = new ArrayList<BufferedImage>();

       if (null != document) {
           PDPageTree pages = document.getPages();
           startPage = startPage == null ? 0 : startPage;
           int len = pages.getCount();
           System.out.println("页数 " + len);
           if (startPage < len) {
               for (int i = startPage; i < len; i++) {
                   PDPage page = pages.get(i);
                   PDFRenderer renderer = new PDFRenderer(document);
                   BufferedImage image = renderer.renderImage(i);
                   imageList.add(image);

               }
           }
       }
       return imageList;
   }

   /**
    * 些图片到XWPFDocument
    *
    * @param xwpfDocument
    * @param bufferedImage
    * @return
    * @throws Exception
    */
   public static XWPFDocument writeImageXWPFDocument(XWPFDocument xwpfDocument, BufferedImage bufferedImage) throws Exception {

       XWPFParagraph xwpfParagraph = xwpfDocument.createParagraph();
       XWPFRun run = xwpfParagraph.createRun();

       if (null != bufferedImage) {
           //粗略写入到文件系统
           ByteArrayOutputStream os = new ByteArrayOutputStream();

           ImageIO.write(bufferedImage, "jpg", os);
           InputStream is = new ByteArrayInputStream(os.toByteArray());
           //run.addCarriageReturn();
           run.addPicture(is, XWPFDocument.PICTURE_TYPE_PNG, new Date().getTime() + ".jpg",
                   Units.toEMU(400), Units.toEMU(bufferedImage.getHeight() * 400 / bufferedImage.getWidth()));
           run.addCarriageReturn();
       }
       return xwpfDocument;
   }

   /**
    * 将pdf写入文档
    *
    * @param xwpfDocument
    * @return
    * @throws Exception
    */
   public static XWPFDocument writePDFXWPFDocument(XWPFDocument xwpfDocument, InputStream inputStream) throws Exception {

       PDDocument doc = PDDocument.load(inputStream);

       List<BufferedImage> imagelist = getImageListFromPDF(doc, 0);

       for (int i = 0; i < imagelist.size(); i++) {
           writeImageXWPFDocument(xwpfDocument, imagelist.get(i));
       }

       return xwpfDocument;
   }

   public static XWPFDocument addHeadTitle(XWPFDocument xwpfDocument, String title, String level) {
       XWPFParagraph paragraph;
       if (xwpfDocument.getParagraphs().size() > 0)
           paragraph = xwpfDocument.getParagraphs().get(0);
       else
           paragraph = xwpfDocument.createParagraph();

       // 段落的格式,下面及个设置,将使新添加的文字向左对其,无缩进.
       paragraph.setIndentationLeft(0);
       paragraph.setIndentationHanging(0);
       paragraph.setAlignment(ParagraphAlignment.LEFT);
       // paragraph.setWordWrap( true );
       paragraph.setStyle(getLevelValue(level));
       // 在段落中新插入一个run,这里的run我理解就是一个word文档需要显示的个体,里面可以放文字,参数0代表在段落的最前面插入
       XWPFRun run = paragraph.insertNewRun(0);
       // 设置run内容
       run.addCarriageReturn();
       run.setText(title);
       run.setFontFamily("宋体");
       run.setBold(true);
       run.setFontSize(20);
       run.addBreak(BreakType.TEXT_WRAPPING);
       run.addCarriageReturn();
       return xwpfDocument;
   }

   public static XWPFDocument addTailTitle(XWPFDocument xwpfDocument, String title, String level) {


       XWPFParagraph paragraph = xwpfDocument.createParagraph();
       paragraph.setIndentationLeft(0);
       paragraph.setIndentationHanging(0);
       paragraph.setAlignment(ParagraphAlignment.LEFT);
       // paragraph.setWordWrap( true );
       paragraph.setStyle(getLevelValue(level));

       XWPFRun run = paragraph.createRun();
       // 设置run内容
       run.setText(title);
       run.setFontFamily("宋体");
       run.setBold(true);
       run.setFontSize(20);
       run.addBreak(BreakType.TEXT_WRAPPING);
       run.addCarriageReturn();

       return xwpfDocument;
   }

   public static String getLevelValue(String level) {

       String value = levelMap.get(level);
       if (value == null) {
           value = "标题 1";
       }
       return value;
   }

   public static void generateFile(List<Map<String, List<Map<String, List<String>>>>> result, HttpServletResponse response) throws IOException {
       //
       URL url = new URL("http://xxx.docx");


       try (XWPFDocument xwpfDocument = new XWPFDocument(url.openStream());) {
           resultT(result, xwpfDocument);
           xwpfDocument.write(response.getOutputStream());
/*            File newFile = new File("f:\\report\\demo111111111111111111.docx");

           OutputStream dest = new FileOutputStream(newFile);
           xwpfDocument.write(dest);*/
       } catch (Exception e) {
           e.printStackTrace();
       }
   }

   public static void resultT(List<Map<String, List<Map<String, List<String>>>>> result, XWPFDocument xwpfDocument) throws Exception {

       int lev1Num = 0, lev2Num = 0;

       for (Map<String, List<Map<String, List<String>>>> r : result) {
           lev1Num++;
           lev2Num = 0;

           for (Map.Entry<String, List<Map<String, List<String>>>> entry : r.entrySet()) {
               String lev1Title = entry.getKey();
               //TODO   输出一级标题
               DocumentMergeUtil.addTailTitle(xwpfDocument, lev1Title, "1");

               List<Map<String, List<String>>> lev2 = entry.getValue();
               for (Map<String, List<String>> lev2M : lev2) {
                   lev2Num++;
                   for (Map.Entry<String, List<String>> lev2Obj : lev2M.entrySet()) {
                       String lev2Title = lev1Num + "." + lev2Num + lev2Obj.getKey();
                       if (!StringUtils.isNoneEmpty(lev2Obj.getKey())) {
                           //TODO  输出二级标题
                           DocumentMergeUtil.addTailTitle(xwpfDocument, lev2Title, "2");
                       }

                       for (String file : lev2Obj.getValue()) {
                           if (file.indexOf(".docx") != -1) {
                               //TODO  合并word

                               try (
                                       InputStream inputStream = new URL("http://xxx" + file).openStream();
                                       XWPFDocument document = new XWPFDocument(inputStream);
                               ) {
//                                String fileN = new Date().getTime()+"";
//
//                                File newFile = new File("f:\\report\\" + fileN + ".docx");
//                                OutputStream dest = new FileOutputStream(newFile);
//                                document.write(dest);

                                   DocumentMergeUtil.appendBody(xwpfDocument, document);
                               } catch (Exception e) {
                                   e.printStackTrace();
                               }

                           } else if (file.indexOf(".pdf") != -1) {
                               try (InputStream inputStream = new URL("http://xxx" + file).openStream();) {

                                   DocumentMergeUtil.writePDFXWPFDocument(xwpfDocument, inputStream);
                               } catch (Exception e) {
                                   e.printStackTrace();
                               }


                           }
                       }
                   }
               }
           }
       }
   }

}

Units.java

/* ====================================================================
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
==================================================================== */
package org.apache.poi.util;

/**
* @author Yegor Kozlov
*/
public class Units {
   /**
    * In Escher absolute distances are specified in
    * English Metric Units (EMUs), occasionally referred to as A units;
    * there are 360000 EMUs per centimeter, 914400 EMUs per inch, 12700 EMUs per point.
    */
   public static final int EMU_PER_PIXEL = 9525;
   public static final int EMU_PER_POINT = 12700;
   public static final int EMU_PER_CENTIMETER = 360000;

   /**
    * Master DPI (576 pixels per inch).
    * Used by the reference coordinate system in PowerPoint (HSLF)
    */
   public static final int MASTER_DPI = 576;    

   /**
    * Pixels DPI (96 pixels per inch)
    */
   public static final int PIXEL_DPI = 96;

   /**
    * Points DPI (72 pixels per inch)
    */
   public static final int POINT_DPI = 72;    


   /**
    * Width of one "standard character" of the default font in pixels. Same for Calibri and Arial.
    * "Standard character" defined as the widest digit character in the given font.
    * Copied from XSSFWorkbook, since that isn't available here.
    * <p>
    * Note this is only valid for workbooks using the default Excel font.
    * <p>
    * Would be nice to eventually support arbitrary document default fonts.
    */
   public static final float DEFAULT_CHARACTER_WIDTH = 7.0017f;

   /**
    * Column widths are in fractional characters, this is the EMU equivalent.
    * One character is defined as the widest value for the integers 0-9 in the 
    * default font.
    */
   public static final int EMU_PER_CHARACTER = (int) (EMU_PER_PIXEL * DEFAULT_CHARACTER_WIDTH);

   /**
    * Converts points to EMUs
    * @param points points
    * @return EMUs
    */
   public static int toEMU(double points){
       return (int)Math.rint(EMU_PER_POINT*points);
   }
   
   /**
    * Converts pixels to EMUs
    * @param pixels pixels
    * @return EMUs
    */
   public static int pixelToEMU(int pixels) {
       return pixels*EMU_PER_PIXEL;
   }

   /**
    * Converts EMUs to points
    * @param emu emu
    * @return points
    */
   public static double toPoints(long emu){
       return (double)emu/EMU_PER_POINT;
   }
   
   /**
    * Converts a value of type FixedPoint to a floating point
    *
    * @param fixedPoint value in fixed point notation
    * @return floating point (double)
    * 
    * @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
    */
   public static double fixedPointToDouble(int fixedPoint) {
       int i = (fixedPoint >> 16);
       int f = fixedPoint & 0xFFFF;
       return (i + f/65536d);
   }
   
   /**
    * Converts a value of type floating point to a FixedPoint
    *
    * @param floatPoint value in floating point notation
    * @return fixedPoint value in fixed points notation
    * 
    * @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
    */
   public static int doubleToFixedPoint(double floatPoint) {
       double fractionalPart = floatPoint % 1d;
       double integralPart = floatPoint - fractionalPart;
       int i = (int)Math.floor(integralPart);
       int f = (int)Math.rint(fractionalPart*65536d);
       return (i << 16) | (f & 0xFFFF);
   }

   public static double masterToPoints(int masterDPI) {
       double points = masterDPI;
       points *= POINT_DPI;
       points /= MASTER_DPI;
       return points;
   }
   
   public static int pointsToMaster(double points) {
       points *= MASTER_DPI;
       points /= POINT_DPI;
       return (int)Math.rint(points);
   }
   
   public static int pointsToPixel(double points) {
       points *= PIXEL_DPI;
       points /= POINT_DPI;
       return (int)Math.rint(points);
   }

   public static double pixelToPoints(int pixel) {
       double points = pixel;
       points *= POINT_DPI;
       points /= PIXEL_DPI;
       return points;
   }
   
   public static int charactersToEMU(double characters) {
       return (int) characters * EMU_PER_CHARACTER;
   }
   
   /**
    * @param columnWidth specified in 256ths of a standard character
    * @return equivalent EMUs
    */
   public static int columnWidthToEMU(int columnWidth) {
       return charactersToEMU(columnWidth / 256d);
   }
   
   /**
    * @param twips (1/20th of a point) typically used for row heights
    * @return equivalent EMUs
    */
   public static int TwipsToEMU(short twips) {
       return (int) (twips / 20d * EMU_PER_POINT);
   }
}

/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */
package org.apache.poi.util;

/**
 * @author Yegor Kozlov
 */
public class Units {
    /**
     * In Escher absolute distances are specified in
     * English Metric Units (EMUs), occasionally referred to as A units;
     * there are 360000 EMUs per centimeter, 914400 EMUs per inch, 12700 EMUs per point.
     */
    public static final int EMU_PER_PIXEL = 9525;
    public static final int EMU_PER_POINT = 12700;
    public static final int EMU_PER_CENTIMETER = 360000;

    /**
     * Master DPI (576 pixels per inch).
     * Used by the reference coordinate system in PowerPoint (HSLF)
     */
    public static final int MASTER_DPI = 576;    

    /**
     * Pixels DPI (96 pixels per inch)
     */
    public static final int PIXEL_DPI = 96;

    /**
     * Points DPI (72 pixels per inch)
     */
    public static final int POINT_DPI = 72;    


    /**
     * Width of one "standard character" of the default font in pixels. Same for Calibri and Arial.
     * "Standard character" defined as the widest digit character in the given font.
     * Copied from XSSFWorkbook, since that isn't available here.
     * <p>
     * Note this is only valid for workbooks using the default Excel font.
     * <p>
     * Would be nice to eventually support arbitrary document default fonts.
     */
    public static final float DEFAULT_CHARACTER_WIDTH = 7.0017f;

    /**
     * Column widths are in fractional characters, this is the EMU equivalent.
     * One character is defined as the widest value for the integers 0-9 in the 
     * default font.
     */
    public static final int EMU_PER_CHARACTER = (int) (EMU_PER_PIXEL * DEFAULT_CHARACTER_WIDTH);

    /**
     * Converts points to EMUs
     * @param points points
     * @return EMUs
     */
    public static int toEMU(double points){
        return (int)Math.rint(EMU_PER_POINT*points);
    }
    
    /**
     * Converts pixels to EMUs
     * @param pixels pixels
     * @return EMUs
     */
    public static int pixelToEMU(int pixels) {
        return pixels*EMU_PER_PIXEL;
    }

    /**
     * Converts EMUs to points
     * @param emu emu
     * @return points
     */
    public static double toPoints(long emu){
        return (double)emu/EMU_PER_POINT;
    }
    
    /**
     * Converts a value of type FixedPoint to a floating point
     *
     * @param fixedPoint value in fixed point notation
     * @return floating point (double)
     * 
     * @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
     */
    public static double fixedPointToDouble(int fixedPoint) {
        int i = (fixedPoint >> 16);
        int f = fixedPoint & 0xFFFF;
        return (i + f/65536d);
    }
    
    /**
     * Converts a value of type floating point to a FixedPoint
     *
     * @param floatPoint value in floating point notation
     * @return fixedPoint value in fixed points notation
     * 
     * @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
     */
    public static int doubleToFixedPoint(double floatPoint) {
        double fractionalPart = floatPoint % 1d;
        double integralPart = floatPoint - fractionalPart;
        int i = (int)Math.floor(integralPart);
        int f = (int)Math.rint(fractionalPart*65536d);
        return (i << 16) | (f & 0xFFFF);
    }

    public static double masterToPoints(int masterDPI) {
        double points = masterDPI;
        points *= POINT_DPI;
        points /= MASTER_DPI;
        return points;
    }
    
    public static int pointsToMaster(double points) {
        points *= MASTER_DPI;
        points /= POINT_DPI;
        return (int)Math.rint(points);
    }
    
    public static int pointsToPixel(double points) {
        points *= PIXEL_DPI;
        points /= POINT_DPI;
        return (int)Math.rint(points);
    }

    public static double pixelToPoints(int pixel) {
        double points = pixel;
        points *= POINT_DPI;
        points /= PIXEL_DPI;
        return points;
    }
    
    public static int charactersToEMU(double characters) {
        return (int) characters * EMU_PER_CHARACTER;
    }
    
    /**
     * @param columnWidth specified in 256ths of a standard character
     * @return equivalent EMUs
     */
    public static int columnWidthToEMU(int columnWidth) {
        return charactersToEMU(columnWidth / 256d);
    }
    
    /**
     * @param twips (1/20th of a point) typically used for row heights
     * @return equivalent EMUs
     */
    public static int TwipsToEMU(short twips) {
        return (int) (twips / 20d * EMU_PER_POINT);
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

一亩尘埃

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值