JAVA将多个word及pdf文件合并至一个word
pom.xml
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.core</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>
工具类DocumentMergeUtil.java
package org.admin.util;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import javax.imageio.ImageIO;
import javax.servlet.http.HttpServletResponse;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.URL;
import java.util.*;
public class DocumentMergeUtil {
static Map<String, String> levelMap = new HashMap<>();
static {
levelMap.put("1", "标题 1");
levelMap.put("2", "标题 2");
levelMap.put("3", "标题 3");
}
/**
* 文档
*
* @param xwpfDocumentList
* @return
* @throws Exception
*/
public static XWPFDocument merge(List<XWPFDocument> xwpfDocumentList) throws Exception {
XWPFDocument doc = null;
for (int i = 0; i < xwpfDocumentList.size(); i++) {
doc = xwpfDocumentList.get(0);
if (i != 0) {
xwpfDocumentList.get(i).createParagraph().setPageBreak(true);
appendBody(doc, xwpfDocumentList.get(i));
}
}
doc.createParagraph().setPageBreak(true);
return doc;
}
public static XWPFDocument createTitleXWPFDocument(String title, String level) throws Exception {
XWPFDocument document = new XWPFDocument();
XWPFParagraph paragraph = document.getParagraphs().get(0);
// 段落的格式,下面及个设置,将使新添加的文字向左对其,无缩进.
paragraph.setIndentationLeft(0);
paragraph.setIndentationHanging(0);
paragraph.setAlignment(ParagraphAlignment.LEFT);
// paragraph.setWordWrap( true );
paragraph.setStyle(getLevelValue(level));
// 在段落中新插入一个run,这里的run我理解就是一个word文档需要显示的个体,里面可以放文字,参数0代表在段落的最前面插入
XWPFRun run = paragraph.createRun();
run.addCarriageReturn();
// 设置run内容
run.setText(title);
run.setFontFamily("宋体");
run.setBold(true);
run.setFontSize(20);
run.addBreak(BreakType.TEXT_WRAPPING);
run.addCarriageReturn();
return document;
}
public static void appendBody(XWPFDocument src, XWPFDocument append) throws Exception {
// for(XWPFParagraph p : append.getParagraphs())
// src.getParagraphs().add(p);
CTBody src1Body = src.getDocument().getBody();
CTBody src2Body = append.getDocument().getBody();
List<XWPFPictureData> allPictures = append.getAllPictures();
// 记录图片合并前及合并后的ID
Map<String, String> map = new HashMap();
for (XWPFPictureData picture : allPictures) {
String before = append.getRelationId(picture);
//将原文档中的图片加入到目标文档中
String after = src.addPictureData(picture.getData(), Document.PICTURE_TYPE_PNG);
map.put(before, after);
}
appendBody(src1Body, src2Body, map);
}
private static void appendBody(CTBody src, CTBody append, Map<String, String> map) throws Exception {
XmlOptions optionsOuter = new XmlOptions();
optionsOuter.setSaveOuter();
String appendString = append.xmlText(optionsOuter);
String srcString = src.xmlText();
String prefix = srcString.substring(0, srcString.indexOf(">") + 1);
String mainPart = srcString.substring(srcString.indexOf(">") + 1, srcString.lastIndexOf("<"));
String sufix = srcString.substring(srcString.lastIndexOf("<"));
String addPart = appendString.substring(appendString.indexOf(">") + 1, appendString.lastIndexOf("<"));
if (map != null && !map.isEmpty()) {
//对xml字符串中图片ID进行替换
for (Map.Entry<String, String> set : map.entrySet()) {
addPart = addPart.replace(set.getKey(), set.getValue());
}
}
//将两个文档的xml内容进行拼接
CTBody makeBody = CTBody.Factory.parse(prefix + mainPart + addPart + sufix);
src.set(makeBody);
}
/**
* 从pdf文档中读取所有的图片列表
*
* @return
* @throws Exception
*/
public static List<BufferedImage> getImageListFromPDF(PDDocument document, Integer startPage) throws Exception {
List<BufferedImage> imageList = new ArrayList<BufferedImage>();
if (null != document) {
PDPageTree pages = document.getPages();
startPage = startPage == null ? 0 : startPage;
int len = pages.getCount();
System.out.println("页数 " + len);
if (startPage < len) {
for (int i = startPage; i < len; i++) {
PDPage page = pages.get(i);
PDFRenderer renderer = new PDFRenderer(document);
BufferedImage image = renderer.renderImage(i);
imageList.add(image);
}
}
}
return imageList;
}
/**
* 些图片到XWPFDocument
*
* @param xwpfDocument
* @param bufferedImage
* @return
* @throws Exception
*/
public static XWPFDocument writeImageXWPFDocument(XWPFDocument xwpfDocument, BufferedImage bufferedImage) throws Exception {
XWPFParagraph xwpfParagraph = xwpfDocument.createParagraph();
XWPFRun run = xwpfParagraph.createRun();
if (null != bufferedImage) {
//粗略写入到文件系统
ByteArrayOutputStream os = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, "jpg", os);
InputStream is = new ByteArrayInputStream(os.toByteArray());
//run.addCarriageReturn();
run.addPicture(is, XWPFDocument.PICTURE_TYPE_PNG, new Date().getTime() + ".jpg",
Units.toEMU(400), Units.toEMU(bufferedImage.getHeight() * 400 / bufferedImage.getWidth()));
run.addCarriageReturn();
}
return xwpfDocument;
}
/**
* 将pdf写入文档
*
* @param xwpfDocument
* @return
* @throws Exception
*/
public static XWPFDocument writePDFXWPFDocument(XWPFDocument xwpfDocument, InputStream inputStream) throws Exception {
PDDocument doc = PDDocument.load(inputStream);
List<BufferedImage> imagelist = getImageListFromPDF(doc, 0);
for (int i = 0; i < imagelist.size(); i++) {
writeImageXWPFDocument(xwpfDocument, imagelist.get(i));
}
return xwpfDocument;
}
public static XWPFDocument addHeadTitle(XWPFDocument xwpfDocument, String title, String level) {
XWPFParagraph paragraph;
if (xwpfDocument.getParagraphs().size() > 0)
paragraph = xwpfDocument.getParagraphs().get(0);
else
paragraph = xwpfDocument.createParagraph();
// 段落的格式,下面及个设置,将使新添加的文字向左对其,无缩进.
paragraph.setIndentationLeft(0);
paragraph.setIndentationHanging(0);
paragraph.setAlignment(ParagraphAlignment.LEFT);
// paragraph.setWordWrap( true );
paragraph.setStyle(getLevelValue(level));
// 在段落中新插入一个run,这里的run我理解就是一个word文档需要显示的个体,里面可以放文字,参数0代表在段落的最前面插入
XWPFRun run = paragraph.insertNewRun(0);
// 设置run内容
run.addCarriageReturn();
run.setText(title);
run.setFontFamily("宋体");
run.setBold(true);
run.setFontSize(20);
run.addBreak(BreakType.TEXT_WRAPPING);
run.addCarriageReturn();
return xwpfDocument;
}
public static XWPFDocument addTailTitle(XWPFDocument xwpfDocument, String title, String level) {
XWPFParagraph paragraph = xwpfDocument.createParagraph();
paragraph.setIndentationLeft(0);
paragraph.setIndentationHanging(0);
paragraph.setAlignment(ParagraphAlignment.LEFT);
// paragraph.setWordWrap( true );
paragraph.setStyle(getLevelValue(level));
XWPFRun run = paragraph.createRun();
// 设置run内容
run.setText(title);
run.setFontFamily("宋体");
run.setBold(true);
run.setFontSize(20);
run.addBreak(BreakType.TEXT_WRAPPING);
run.addCarriageReturn();
return xwpfDocument;
}
public static String getLevelValue(String level) {
String value = levelMap.get(level);
if (value == null) {
value = "标题 1";
}
return value;
}
public static void generateFile(List<Map<String, List<Map<String, List<String>>>>> result, HttpServletResponse response) throws IOException {
//
URL url = new URL("http://xxx.docx");
try (XWPFDocument xwpfDocument = new XWPFDocument(url.openStream());) {
resultT(result, xwpfDocument);
xwpfDocument.write(response.getOutputStream());
/* File newFile = new File("f:\\report\\demo111111111111111111.docx");
OutputStream dest = new FileOutputStream(newFile);
xwpfDocument.write(dest);*/
} catch (Exception e) {
e.printStackTrace();
}
}
public static void resultT(List<Map<String, List<Map<String, List<String>>>>> result, XWPFDocument xwpfDocument) throws Exception {
int lev1Num = 0, lev2Num = 0;
for (Map<String, List<Map<String, List<String>>>> r : result) {
lev1Num++;
lev2Num = 0;
for (Map.Entry<String, List<Map<String, List<String>>>> entry : r.entrySet()) {
String lev1Title = entry.getKey();
//TODO 输出一级标题
DocumentMergeUtil.addTailTitle(xwpfDocument, lev1Title, "1");
List<Map<String, List<String>>> lev2 = entry.getValue();
for (Map<String, List<String>> lev2M : lev2) {
lev2Num++;
for (Map.Entry<String, List<String>> lev2Obj : lev2M.entrySet()) {
String lev2Title = lev1Num + "." + lev2Num + lev2Obj.getKey();
if (!StringUtils.isNoneEmpty(lev2Obj.getKey())) {
//TODO 输出二级标题
DocumentMergeUtil.addTailTitle(xwpfDocument, lev2Title, "2");
}
for (String file : lev2Obj.getValue()) {
if (file.indexOf(".docx") != -1) {
//TODO 合并word
try (
InputStream inputStream = new URL("http://xxx" + file).openStream();
XWPFDocument document = new XWPFDocument(inputStream);
) {
// String fileN = new Date().getTime()+"";
//
// File newFile = new File("f:\\report\\" + fileN + ".docx");
// OutputStream dest = new FileOutputStream(newFile);
// document.write(dest);
DocumentMergeUtil.appendBody(xwpfDocument, document);
} catch (Exception e) {
e.printStackTrace();
}
} else if (file.indexOf(".pdf") != -1) {
try (InputStream inputStream = new URL("http://xxx" + file).openStream();) {
DocumentMergeUtil.writePDFXWPFDocument(xwpfDocument, inputStream);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
}
}
}
}
Units.java
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.util;
/**
* @author Yegor Kozlov
*/
public class Units {
/**
* In Escher absolute distances are specified in
* English Metric Units (EMUs), occasionally referred to as A units;
* there are 360000 EMUs per centimeter, 914400 EMUs per inch, 12700 EMUs per point.
*/
public static final int EMU_PER_PIXEL = 9525;
public static final int EMU_PER_POINT = 12700;
public static final int EMU_PER_CENTIMETER = 360000;
/**
* Master DPI (576 pixels per inch).
* Used by the reference coordinate system in PowerPoint (HSLF)
*/
public static final int MASTER_DPI = 576;
/**
* Pixels DPI (96 pixels per inch)
*/
public static final int PIXEL_DPI = 96;
/**
* Points DPI (72 pixels per inch)
*/
public static final int POINT_DPI = 72;
/**
* Width of one "standard character" of the default font in pixels. Same for Calibri and Arial.
* "Standard character" defined as the widest digit character in the given font.
* Copied from XSSFWorkbook, since that isn't available here.
* <p>
* Note this is only valid for workbooks using the default Excel font.
* <p>
* Would be nice to eventually support arbitrary document default fonts.
*/
public static final float DEFAULT_CHARACTER_WIDTH = 7.0017f;
/**
* Column widths are in fractional characters, this is the EMU equivalent.
* One character is defined as the widest value for the integers 0-9 in the
* default font.
*/
public static final int EMU_PER_CHARACTER = (int) (EMU_PER_PIXEL * DEFAULT_CHARACTER_WIDTH);
/**
* Converts points to EMUs
* @param points points
* @return EMUs
*/
public static int toEMU(double points){
return (int)Math.rint(EMU_PER_POINT*points);
}
/**
* Converts pixels to EMUs
* @param pixels pixels
* @return EMUs
*/
public static int pixelToEMU(int pixels) {
return pixels*EMU_PER_PIXEL;
}
/**
* Converts EMUs to points
* @param emu emu
* @return points
*/
public static double toPoints(long emu){
return (double)emu/EMU_PER_POINT;
}
/**
* Converts a value of type FixedPoint to a floating point
*
* @param fixedPoint value in fixed point notation
* @return floating point (double)
*
* @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
*/
public static double fixedPointToDouble(int fixedPoint) {
int i = (fixedPoint >> 16);
int f = fixedPoint & 0xFFFF;
return (i + f/65536d);
}
/**
* Converts a value of type floating point to a FixedPoint
*
* @param floatPoint value in floating point notation
* @return fixedPoint value in fixed points notation
*
* @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
*/
public static int doubleToFixedPoint(double floatPoint) {
double fractionalPart = floatPoint % 1d;
double integralPart = floatPoint - fractionalPart;
int i = (int)Math.floor(integralPart);
int f = (int)Math.rint(fractionalPart*65536d);
return (i << 16) | (f & 0xFFFF);
}
public static double masterToPoints(int masterDPI) {
double points = masterDPI;
points *= POINT_DPI;
points /= MASTER_DPI;
return points;
}
public static int pointsToMaster(double points) {
points *= MASTER_DPI;
points /= POINT_DPI;
return (int)Math.rint(points);
}
public static int pointsToPixel(double points) {
points *= PIXEL_DPI;
points /= POINT_DPI;
return (int)Math.rint(points);
}
public static double pixelToPoints(int pixel) {
double points = pixel;
points *= POINT_DPI;
points /= PIXEL_DPI;
return points;
}
public static int charactersToEMU(double characters) {
return (int) characters * EMU_PER_CHARACTER;
}
/**
* @param columnWidth specified in 256ths of a standard character
* @return equivalent EMUs
*/
public static int columnWidthToEMU(int columnWidth) {
return charactersToEMU(columnWidth / 256d);
}
/**
* @param twips (1/20th of a point) typically used for row heights
* @return equivalent EMUs
*/
public static int TwipsToEMU(short twips) {
return (int) (twips / 20d * EMU_PER_POINT);
}
}
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.util;
/**
* @author Yegor Kozlov
*/
public class Units {
/**
* In Escher absolute distances are specified in
* English Metric Units (EMUs), occasionally referred to as A units;
* there are 360000 EMUs per centimeter, 914400 EMUs per inch, 12700 EMUs per point.
*/
public static final int EMU_PER_PIXEL = 9525;
public static final int EMU_PER_POINT = 12700;
public static final int EMU_PER_CENTIMETER = 360000;
/**
* Master DPI (576 pixels per inch).
* Used by the reference coordinate system in PowerPoint (HSLF)
*/
public static final int MASTER_DPI = 576;
/**
* Pixels DPI (96 pixels per inch)
*/
public static final int PIXEL_DPI = 96;
/**
* Points DPI (72 pixels per inch)
*/
public static final int POINT_DPI = 72;
/**
* Width of one "standard character" of the default font in pixels. Same for Calibri and Arial.
* "Standard character" defined as the widest digit character in the given font.
* Copied from XSSFWorkbook, since that isn't available here.
* <p>
* Note this is only valid for workbooks using the default Excel font.
* <p>
* Would be nice to eventually support arbitrary document default fonts.
*/
public static final float DEFAULT_CHARACTER_WIDTH = 7.0017f;
/**
* Column widths are in fractional characters, this is the EMU equivalent.
* One character is defined as the widest value for the integers 0-9 in the
* default font.
*/
public static final int EMU_PER_CHARACTER = (int) (EMU_PER_PIXEL * DEFAULT_CHARACTER_WIDTH);
/**
* Converts points to EMUs
* @param points points
* @return EMUs
*/
public static int toEMU(double points){
return (int)Math.rint(EMU_PER_POINT*points);
}
/**
* Converts pixels to EMUs
* @param pixels pixels
* @return EMUs
*/
public static int pixelToEMU(int pixels) {
return pixels*EMU_PER_PIXEL;
}
/**
* Converts EMUs to points
* @param emu emu
* @return points
*/
public static double toPoints(long emu){
return (double)emu/EMU_PER_POINT;
}
/**
* Converts a value of type FixedPoint to a floating point
*
* @param fixedPoint value in fixed point notation
* @return floating point (double)
*
* @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
*/
public static double fixedPointToDouble(int fixedPoint) {
int i = (fixedPoint >> 16);
int f = fixedPoint & 0xFFFF;
return (i + f/65536d);
}
/**
* Converts a value of type floating point to a FixedPoint
*
* @param floatPoint value in floating point notation
* @return fixedPoint value in fixed points notation
*
* @see <a href="http://msdn.microsoft.com/en-us/library/dd910765(v=office.12).aspx">[MS-OSHARED] - 2.2.1.6 FixedPoint</a>
*/
public static int doubleToFixedPoint(double floatPoint) {
double fractionalPart = floatPoint % 1d;
double integralPart = floatPoint - fractionalPart;
int i = (int)Math.floor(integralPart);
int f = (int)Math.rint(fractionalPart*65536d);
return (i << 16) | (f & 0xFFFF);
}
public static double masterToPoints(int masterDPI) {
double points = masterDPI;
points *= POINT_DPI;
points /= MASTER_DPI;
return points;
}
public static int pointsToMaster(double points) {
points *= MASTER_DPI;
points /= POINT_DPI;
return (int)Math.rint(points);
}
public static int pointsToPixel(double points) {
points *= PIXEL_DPI;
points /= POINT_DPI;
return (int)Math.rint(points);
}
public static double pixelToPoints(int pixel) {
double points = pixel;
points *= POINT_DPI;
points /= PIXEL_DPI;
return points;
}
public static int charactersToEMU(double characters) {
return (int) characters * EMU_PER_CHARACTER;
}
/**
* @param columnWidth specified in 256ths of a standard character
* @return equivalent EMUs
*/
public static int columnWidthToEMU(int columnWidth) {
return charactersToEMU(columnWidth / 256d);
}
/**
* @param twips (1/20th of a point) typically used for row heights
* @return equivalent EMUs
*/
public static int TwipsToEMU(short twips) {
return (int) (twips / 20d * EMU_PER_POINT);
}
}