提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
apache 操作 word(.docx)
业务场景
多个docx文档合并到一个文档中
遇到的坑
开始做的时候,是在网上找的资料,通过 XWPFDocument.getBdoy().xmlText() Xml字符串,把所有的xml合并在一起,再 CTBody.set(xml).
/**
* 合并文档内容
*
* @param src 目标文档
* @param append 要合并的文档
* @param map 当时想处理页眉页脚,这里存放的是rId (由于实现不了,可能写的有问题,就不做描述了)
* @throws Exception
*/
private static void appendBody(CTBody src, CTBody append,Map<String,String> map) throws Exception {
XmlOptions optionsOuter = new XmlOptions();
optionsOuter.setSaveOuter();
String appendString = append.xmlText(optionsOuter);
String srcString = src.xmlText();
String prefix = srcString.substring(0, srcString.indexOf(">") + 1);
String mainPart = srcString.substring(srcString.indexOf(">") + 1,
srcString.lastIndexOf("<"));
String sufix = srcString.substring(srcString.lastIndexOf("<"));
// <w:sectPr>多了 匹配<w:sectPr的正则,由于只需删除最后一个<w:sectPr>标签,暂时用截取的方法处理
//appendString = appendString.replaceAll("/<[\\s]*?w:sectPr[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?w:sectPr[\\s]*?>{1}/$", "");
//if(isFz){
// appendString = appendString.replaceAll("/<[\\s]*?w:sectPr[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?w:sectPr[\\s]*?>{1}/$", "");
//} else {
for (String oldRId : map.keySet()) {
appendString = appendString.replace(oldRId, map.get(oldRId));
}
int i = appendString.lastIndexOf("<w:sectPr");
int i1 = appendString.lastIndexOf("</w:sectPr>") + 11;
appendString = appendString.substring(0, i) + appendString.substring(i1);
//}
String addPart = appendString.substring(appendString.indexOf(">") + 1,
appendString.lastIndexOf("<"));
CTBody makeBody = CTBody.Factory.parse(prefix + mainPart + addPart
+ sufix);
src.set(makeBody);
}
<w:headerReference w:type=“default” r:id=“rId9”/>
新合并的word中 没有rId9对应的xml文件,导致word打不开.所以当时就想了个办法,匹配<w:sectPr>标签并替换掉.
上面这个方法再不处理哦页眉页脚也是可以合并word文档的.
解决方案
直接上代码
/**
* 测试方法
* 业务代码 MergeModel的type只是为了让我区分类型,在处理的时候更方便
* @throws IOException
*/
public static void testMergeWord() throws IOException {
List<MergeModel> collect = new ArrayList<>();
collect.add(new MergeModel("", "L:\\文件1.docx"));
collect.add(new MergeModel("zc","L:\\文件2.docx"));
collect.add(new MergeModel("xj","L:\\文件3.docx"));
collect.add(new MergeModel("lr","L:\\文件4.docx"));
collect.add(new MergeModel("qy1","L\\文件5.docx"));
collect.add(new MergeModel("fz","L:\\文件6.docx"));
collect.add(new MergeModel("fz","L:\\文件7.docx"));
collect.add(new MergeModel("fz","L:\\文件8.docx"));
collect.add(new MergeModel("fz","L:\\文件9.docx"));
collect.add(new MergeModel("fz","L:\\文件10.docx"));
collect.add(new MergeModel("fz","L:\\文件11.docx"));
collect.add(new MergeModel("fz","L:\\文件12.docx"));
collect.add(new MergeModel("fz","L:\\文件13.docx"));
collect.add(new MergeModel("fz","L:\\文件14.docx"));
collect.add(new MergeModel("fz","L:\\文件15.docx"));
collect.add(new MergeModel("fz","L:\\文件16.docx"));
mergeDoc(collect, "L:\\结果.docx");
}
/**
* 合并docx文件
*
* @param srcDocxs 需要合并的目标docx文件
* @param destDocx 合并后的docx输出文件
*/
public static void mergeDoc(List<MergeModel> srcDocxs, String destDocx) {
int fzStartIndex = 0;
OutputStream dest = null;
int opcpSize = srcDocxs.size();
//获取的OPCPackage对象大于0时,执行合并操作
if (opcpSize > 0) {
try {
dest = new FileOutputStream(destDocx);
MergeModel mergeModel = srcDocxs.get(0);
XWPFDocument src1Document = new XWPFDocument(mergeModel.getOpcPackage());
// 空为报告封面,需要换页
if(StringUtils.isBlank(mergeModel.getType())){
dealCover(src1Document);
}
boolean firFz = false;
//OPCPackage大于1的部分执行合并操作
if (opcpSize > 1) {
List<XWPFParagraph> headerParagraphs = new ArrayList<>();
List<XWPFParagraph> fzFooterParagraphs = new ArrayList<>();
for (int i = 1; i < opcpSize; i++) {
MergeModel mergeModel2 = srcDocxs.get(i);
XWPFDocument src2Document = new XWPFDocument(mergeModel2.getOpcPackage());
clearSectPr(src2Document);
if (mergeModel2.isReportModuleWord()) {
BigInteger startNum = null;
// 设置起始页码
if("zc".equals(mergeModel2.getType())){
startNum = new BigInteger("1");
}
copySectPr(src2Document, startNum);
//src2Document.createParagraph().createRun().addBreak(BreakType.PAGE);
}
if(mergeModel2.getType().startsWith("fz")){
// 只需要拿一次
if(CollectionUtils.isEmpty(headerParagraphs)){
List<XWPFHeader> headerList = src2Document.getHeaderList();
for (XWPFHeader xwpfHeader : headerList) {
String text = xwpfHeader.getText();
if(StringUtils.isNotBlank(text.trim())){
headerParagraphs = xwpfHeader.getParagraphs();
break;
}
}
}
// 只需要拿一次
if(CollectionUtils.isEmpty(fzFooterParagraphs)){
List<XWPFFooter> footerList = src2Document.getFooterList();
for (XWPFFooter xwpfFooter : footerList) {
String text = xwpfFooter.getText();
if(StringUtils.isNotBlank(text.trim())){
fzFooterParagraphs = xwpfFooter.getParagraphs();
break;
}
}
}
}
List<IBodyElement> bodyElements = src2Document.getBodyElements();
for (IBodyElement element : bodyElements) {
if(element instanceof XWPFParagraph){
XWPFParagraph paragraph = (XWPFParagraph) element;
XWPFParagraph xwpfParagraph = src1Document.createParagraph();
xwpfParagraph.getCTP().set(paragraph.getCTP().copy());
if(mergeModel2.getType().startsWith("fz")){
CTP ctp = xwpfParagraph.getCTP();
CTPPr pPr = ctp.getPPr();
if (pPr == null) {
continue;
}
CTSectPr sectPr = pPr.getSectPr();
if (sectPr == null) {
continue;
}
createDefaultHeader(src1Document, sectPr, headerParagraphs);
copyFooter(src1Document, sectPr, fzFooterParagraphs);
if(!firFz){
CTPageNumber ctPageNumber = sectPr.addNewPgNumType();
ctPageNumber.setStart(new BigInteger("1"));
}
firFz = true;
}
} else if(element instanceof XWPFTable){
XWPFTable xwpfTable = (XWPFTable) element;
XWPFTable table = src1Document.createTable();
table.getCTTbl().set(xwpfTable.getCTTbl().copy());
}
}
if(mergeModel2.isReportModuleWord()){
XWPFParagraph paragraph = src1Document.getLastParagraph();
CTPPr pPr = paragraph.getCTP().getPPr();
if (pPr == null) {
pPr = paragraph.getCTP().addNewPPr();
}
CTSectPr sectPr = pPr.getSectPr();
if (sectPr == null) {
sectPr = pPr.addNewSectPr();
}
List<XWPFParagraph> xwpfParagraphList = new ArrayList<>();
List<XWPFFooter> footerList = src2Document.getFooterList();
for (XWPFFooter xwpfFooter : footerList) {
if(StringUtils.isNotBlank(xwpfFooter.getText())){
xwpfParagraphList = xwpfFooter.getParagraphs();
break;
}
}
copyFooter(src1Document, sectPr, xwpfParagraphList);
}
}
}
//将合并的文档写入目标文件中
src1Document.write(dest);
} catch (Exception e) {
log.error(Consts.EMPTY, e);
e.printStackTrace();
} finally {
//关闭流
IoUtil.close(dest);
}
}
}
/**
* 处理封面的页脚页宽等信息
* @param docx
*/
public static void dealCover(XWPFDocument docx){
// 这里是复制原文档的页脚信息,我这只有一个,所以直接这么处理了
List<XWPFParagraph> footerParagraph = new ArrayList<>();
List<XWPFFooter> footerList = docx.getFooterList();
for (XWPFFooter xwpfFooter : footerList) {
// 找到需要的页脚
if(StringUtils.isNotBlank(xwpfFooter.getText()) && xwpfFooter.getText().contains("页脚关键信息")){
footerParagraph = xwpfFooter.getParagraphs();
break;
}
}
// 换页,业务需求
docx.createParagraph().createRun().addBreak(BreakType.PAGE);
List<XWPFParagraph> paragraphs = docx.getParagraphs();
CTPageMar pgMar = null;
CTPageSz pgSz = null;
// 段落 从后往前找,复制宽度和边距等
for (int p = paragraphs.size() - 1; p >= 0; p--) {
XWPFParagraph lastParagraph = paragraphs.get(p);
if(lastParagraph.getCTP().getPPr() == null || lastParagraph.getCTP().getPPr().getSectPr() == null){
continue;
}
CTSectPr sectPr = lastParagraph.getCTP().getPPr().getSectPr();
pgMar = sectPr.getPgMar();
pgSz = sectPr.getPgSz();
break;
}
XWPFParagraph lastParagraph = docx.createParagraph();
CTPPr pPr = lastParagraph.getCTP().getPPr();
if(pPr == null){
pPr = lastParagraph.getCTP().addNewPPr();
}
//pPr.addNewWidowControl();
CTSectPr sectPr = pPr.getSectPr();
if(sectPr == null){
sectPr = pPr.addNewSectPr();
}
if (pgMar != null) {
sectPr.addNewPgMar().set(pgMar.copy());
}
if (pgSz != null) {
sectPr.addNewPgSz().set(pgSz.copy());
}
// 清空首页所有的页眉页脚信息
// 因为自己要重新设置页脚,所以我直接清空了页眉页脚属性.
clearSectPr(docx);
// 设置为起始页
CTPageNumber ctPageNumber = sectPr.addNewPgNumType();
ctPageNumber.setStart(new BigInteger("1"));
// 复制首页的页脚
copyFooter(docx, sectPr, footerParagraph);
}
/**
* 清空Sectpr的页眉页脚标签属性
* @param xwpfDocument
*/
public static void clearSectPr(XWPFDocument xwpfDocument){
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
CTP ctp = paragraph.getCTP();
CTPPr pPr = ctp.getPPr();
if (pPr == null) {
continue;
}
CTSectPr sectPr = pPr.getSectPr();
if (sectPr == null) {
continue;
}
CTPageSz pgSz = sectPr.getPgSz();
CTSectPr ctSectPr = CTSectPr.Factory.newInstance();
CTPageSz ctPageSz = ctSectPr.addNewPgSz();
ctPageSz.set(pgSz.copy());
//ctSectPr.addNewDocGrid().set(sectPr.getDocGrid().copy());
ctSectPr.addNewPgMar().set(sectPr.getPgMar().copy());
//ctSectPr.addNewCols().set(sectPr.getCols().copy());
//ctSectPr.addNewPgNumType().set(src2SectPr.getPgNumType().copy());
ctSectPr.setRsidR(sectPr.getRsidR());
ctSectPr.setRsidRPr(sectPr.getRsidRPr());
ctSectPr.setRsidSect(sectPr.getRsidSect());
pPr.setSectPr(ctSectPr);
}
CTSectPr sectPr1 = xwpfDocument.getDocument().getBody().getSectPr();
if (sectPr1 == null) {
return;
}
CTPageSz pgSz = sectPr1.getPgSz();
CTSectPr ctSectPr = CTSectPr.Factory.newInstance();
CTPageSz ctPageSz = ctSectPr.addNewPgSz();
ctPageSz.set(pgSz.copy());
ctSectPr.addNewDocGrid().set(sectPr1.getDocGrid().copy());
ctSectPr.addNewPgMar().set(sectPr1.getPgMar().copy());
ctSectPr.addNewCols().set(sectPr1.getCols().copy());
//ctSectPr.addNewPgNumType().set(src2SectPr.getPgNumType().copy());
ctSectPr.setRsidR(sectPr1.getRsidR());
ctSectPr.setRsidRPr(sectPr1.getRsidRPr());
ctSectPr.setRsidSect(sectPr1.getRsidSect());
xwpfDocument.getDocument().getBody().setSectPr(ctSectPr);
}
/**
* 页眉页脚的主要处理
**/
public static void copyFooter(XWPFDocument docx, CTSectPr sectPr, List<XWPFParagraph> xwpfParagraphList) {
XWPFRelation relation = XWPFRelation.FOOTER;
int relationIndex = getRelationIndex(relation, docx);
XWPFFooter footer = (XWPFFooter) docx.createRelationship(relation, XWPFFactory.getInstance(), relationIndex);
for (XWPFParagraph xwpfParagraph : xwpfParagraphList) {
CTP ctp = footer.createParagraph().getCTP();
ctp.set(xwpfParagraph.getCTP().copy());
}
// 把上面创建的footer的关联id,设到需要的分页节上CTSectPr
String relationId = docx.getRelationId(footer);
CTHdrFtrRef ctHdrFtrRef = sectPr.addNewFooterReference();
ctHdrFtrRef.setId(relationId);
}
/**
* 获取关联 rId的最大值
* @param relation
* @param doc
* @return
*/
public static int getRelationIndex(XWPFRelation relation,XWPFDocument doc) {
int i = 1;
for (POIXMLDocumentPart.RelationPart rp : doc.getRelationParts()) {
if (rp.getRelationship().getRelationshipType().equals(relation.getRelation())) {
i++;
}
}
return i;
}
/**
* 把最外层的sectPr标签的属性做了copy,去除了页眉页脚等标签,带有设置开始页码的处理
* @param xwpfDocument
*/
public static void copySectPr(XWPFDocument xwpfDocument,BigInteger startNum) {
if(!xwpfDocument.getDocument().getBody().isSetSectPr()){
return;
}
CTBody body = xwpfDocument.getDocument().getBody();
CTSectPr src2SectPr = body.getSectPr();
//src2SectPr.set
CTPageSz pgSz = src2SectPr.getPgSz();
XWPFParagraph xwpfParagraph = xwpfDocument.getLastParagraph();
//CTP ctp = CTP.Factory.newInstance();
//XWPFParagraph xwpfParagraph = new XWPFParagraph(ctp, xwpfDocument);
CTPPr pPr = xwpfParagraph.getCTP().getPPr();
if(pPr == null){
pPr = xwpfParagraph.getCTP().addNewPPr();
}
if(pPr.getSectPr() != null){
return;
}
CTSectPr ctSectPr = pPr.addNewSectPr();
//ctSectPr.set(src2SectPr.copy());
//ctSectPr.unsetFootnotePr();
CTPageSz ctPageSz = ctSectPr.addNewPgSz();
ctPageSz.set(pgSz.copy());
ctSectPr.addNewDocGrid().set(src2SectPr.getDocGrid().copy());
ctSectPr.addNewPgMar().set(src2SectPr.getPgMar().copy());
ctSectPr.addNewCols().set(src2SectPr.getCols().copy());
//ctSectPr.addNewPgNumType().set(src2SectPr.getPgNumType().copy());
ctSectPr.setRsidR(src2SectPr.getRsidR());
ctSectPr.setRsidRPr(src2SectPr.getRsidRPr());
ctSectPr.setRsidSect(src2SectPr.getRsidSect());
if (startNum != null) {
CTPageNumber ctPageNumber = ctSectPr.addNewPgNumType();
ctPageNumber.setStart(startNum);
}
}
/**
* 复制已有页眉
* @param docx
* @param ctSectPr
* @param paragraphList
*/
public static void createDefaultHeader(XWPFDocument docx, CTSectPr ctSectPr, List<XWPFParagraph> paragraphList) {
XWPFRelation relation = XWPFRelation.HEADER;
int relationIndex = getRelationIndex(relation, docx);
XWPFHeader header = (XWPFHeader) docx.createRelationship(relation, XWPFFactory.getInstance(), relationIndex);
for (XWPFParagraph xwpfParagraph : paragraphList) {
XWPFParagraph paragraph = header.createParagraph();
paragraph.getCTP().set(xwpfParagraph.getCTP().copy());
}
String relationId = docx.getRelationId(header);
CTHdrFtrRef ctHdrFtrRef = ctSectPr.addNewHeaderReference();
ctHdrFtrRef.setId(relationId);
}
总结
个人感觉上面代码主要是copyFooter和createDefaultHeader方法解决了我页眉页脚的问题,其他只是基于我业务做出的关联改动.
页眉页脚
<w:pPr>
<w:pStyle w:val="1"/>
<w:rPr>
<w:rFonts w:eastAsiaTheme="minorEastAsia"/>
</w:rPr>
<w:sectPr w:rsidR="00AC18FB">
<w:headerReference w:type="default" r:id="rId9"/>
<w:footerReference w:type="default" r:id="rId10"/>
<w:pgSz w:w="12240" w:h="15840"/>
<w:pgMar w:top="1440" w:right="1800" w:bottom="1440" w:left="1800" w:header="720" w:footer="720" w:gutter="0"/>
<w:cols w:space="720"/>
</w:sectPr>
</w:pPr>
w:headerReference和w:footerReference控制页眉页脚,后面的type就是下图的信息
里面的rId,在 .\word_rels\document.xml.rels映射到 .\word_rels的各种xml中.例如:footer1.xml,header1.xml
注: 上面的路径是word.docx,把docx后缀修改成.zip,然后解压得到的文件
上面操作的原理就是创建页眉页脚的关联xml,然后拿到关联id设置给w:headerReference标签的 r:id属性.
合并word
开始采用获取body的xml进行合并,但是合并xml后,src1Document的getParagraphs方法获取到的还是合并前的段落,导致我后需找不到插入分页符的节点,最终选择了一个个IBodyElement 进行copy合并。如果只是简单的内容合并,拼接xml的方法完全可以实现。