关于word合并、分割、转换问题

关于word合并、分割、转换问题

最近项目中有个这样的需求:要把word 按章节(大纲)拆分多个word,然后再将这些word能合并成一个整体。看到这个需要,其实也是头疼,100个程序员中应该100位都没有做过类似的开发,最多的就是poi 读取、生成word,那么接下来,我会将最近研究的word切割、转换、合并以笔记的形式记录,也算做为积累吧!

word 分割 :

   word 分割,使用的 poi,这个还真有点难,如果不是同事给国帮助,分割这块,还是很难搞定,不过合并用的第三方插件,很快就搞定了,上代码吧:

package com.sysware.soft603.util.backUp;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFStyles;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;

import com.sysware.fai.entity.BookMarkInfo;

public class POItest1 {
	public static void main(String[] args) throws IOException {

		splitWord();

	}

	private static void splitWord() {
		String path = "d:\\GJB 1362A-2007 军工产品定型程序和要求.docx";
		InputStream is = null;
		List<BookMarkInfo> bookMarkInfos = new ArrayList<BookMarkInfo>();
		try {
			is = new FileInputStream(path);
		} catch (FileNotFoundException e) {
			// TODO 自动生成的 catch 块
			e.printStackTrace();
		}

		XWPFDocument doc = null;
		try {
			doc = new XWPFDocument(is);
		} catch (IOException e) {
			// TODO 自动生成的 catch 块
			e.printStackTrace();
		}

		// 获取段落
		List<XWPFParagraph> paras = doc.getParagraphs();
		// 获取bodyElements
		List<IBodyElement> bodyElements = doc.getBodyElements();
		// 获取doc样式
		XWPFStyles styles = doc.getStyles();

		int j = 0;
		// /切割成的word 文件存储位置
		String patha = "d:\\poi\\";
		// 根据大纲定义分割成的段落
		ArrayList<Integer> al_duanLuo = new ArrayList<Integer>();
		// 大纲名称
		ArrayList al2_name = new ArrayList<>();
		// 大纲级别
		ArrayList<Integer> al3_jiBie = new ArrayList<Integer>();
		ArrayList<Integer> al5 = new ArrayList<Integer>();
		// 存放生成wordId
		ArrayList<String> al6_wordId = new ArrayList<String>();

		for (int i = 0; i < bodyElements.size(); i++) {
			IBodyElement bodyElement = bodyElements.get(i);
			try {
				if (j == 0) {
					al_duanLuo.add(i);
					j++;
					al2_name.add("首页");
					al3_jiBie.add(1);
					// al5.add(1);
					al6_wordId.add(java.util.UUID.randomUUID().toString());
				}
				if (bodyElement.getElementType() == BodyElementType.PARAGRAPH) {
					XWPFParagraph para = (XWPFParagraph) bodyElement;

					if (styles.getStyle(para.getStyle()).getCTStyle().getName()
							.getVal().contains("heading")) {
						// 以标题创建第一个文件
						al_duanLuo.add(i);
						j++;
						al2_name.add(para.getParagraphText());
						// System.out.println(al2);
						String temps = styles.getStyle(para.getStyle())
								.getCTStyle().getName().getVal()
								.split("heading")[1].trim();
						al3_jiBie.add(Integer.parseInt(temps));
						// al5.add(Integer.parseInt(para.getStyleID()));
						al6_wordId.add(java.util.UUID.randomUUID().toString());
						// System.out.println("a2======" + al2_name);
						// System.out.println("a3======" + al3_jiBie);
						// System.out.println("al======" + al_duanLuo);
						// System.out.println("a5======" + al5);
					}
				}
			} catch (Exception e) {
				// TODO Auto-generated catch block
				e.printStackTrace();

			}

		}
		// 定义存放父新id
		ArrayList al4_parentId = new ArrayList<>();

		for (int i = 0; i < al_duanLuo.size(); i++) {
			if (al3_jiBie.get(i) > 1) {
				int k;
				for (k = i; k >= 0; k--) {
					if ((int) al3_jiBie.get(k) < (int) al3_jiBie.get(i)) {
						break;
					}
				}
				al4_parentId.add(al6_wordId.get(k));
			} else {
				al4_parentId.add(" ");
			}
		}
		// for (int i = 0; i < al_duanLuo.size(); i++) {
		// System.out.println("===================");
		// System.out.println("al2=" + al2_name.get(i));
		// System.out.println("al3=" + al3_jiBie.get(i));
		// System.out.println("all4=" + al4_parentId.get(i));
		// System.out.println("===================");
		// // 根据关系创建拼接组装xml文件
		// }
		System.out.println("al_duanLuo" + al_duanLuo);
		System.out.println("al2_name=" + al2_name);
		System.out.println("al3_jiBie=" + al3_jiBie);
		// System.out.println(al6_wordId);
		System.out.println("al4_parentId=" + al4_parentId);

		XWPFDocument newDoc = doc;
		int max = 0;
		max = bodyElements.size() - 1;

		al_duanLuo.add(max);
		try {
			for (int k = 0; k < al_duanLuo.size() - 1; k++) {
				path = "d:\\GJB 1362A-2007 军工产品定型程序和要求.docx";
				is = null;
				try {
					is = new FileInputStream(path);
				} catch (FileNotFoundException e) {
					// TODO 自动生成的 catch 块
					e.printStackTrace();
				}
				doc = null;
				try {
					doc = new XWPFDocument(is);
					// 移除多级列表,移除前面的编号,这里分割后是有编号的,不过这里如果你不移除的话,直接把代码注释掉即可
					if (k != 0) {
						XWPFParagraph para1 = (XWPFParagraph) doc
								.getBodyElements().get(al_duanLuo.get(k));
						String str1 = para1.getStyleID();
						doc.getStyles().getStyle(str1).getCTStyle().getPPr()
								.unsetNumPr();
					}

				} catch (IOException e) {
					// TODO 自动生成的 catch 块
					e.printStackTrace();
				}

				// 移除前0--14,
				int temp = al_duanLuo.get(k);
				int tempCount = al_duanLuo.get(k + 1);

				for (int u = max; u > tempCount - 1; u--) {
					doc.removeBodyElement(u);
					
				}

				// 进行移除之前
				for (int l = temp - 1; l >= 0; l--) {
					doc.removeBodyElement(l);
					
				}
				// XWPFParagraph tempDuan = paras.get(temp);
				// paras.get(7).setStyle(paras.get(1).getStyle());
				// XWPFRun run = paras.get(7).insertNewRun(0);
				// run.setText("10086");
				OutputStream out = null;

				out = new FileOutputStream("d:\\poi\\" + al6_wordId.get(k)
						+ ".docx");
				BookMarkInfo bookMarkInfo = new BookMarkInfo();
				bookMarkInfo.setId(al6_wordId.get(k));
				bookMarkInfo.setPid(al4_parentId.get(k).toString());
				bookMarkInfo.setName(al2_name.get(k).toString());
				bookMarkInfo.setLevel(al3_jiBie.get(k));
				bookMarkInfos.add(bookMarkInfo);
				try {
					doc.write(out);
				} catch (IOException e) {
					// TODO 自动生成的 catch 块
					e.printStackTrace();
				}

			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println("over");
		CreateXml(bookMarkInfos);
		//遍历xml文件
		
	}

	public static void CreateXml(List<BookMarkInfo> bookMarkInfos) {
		String target = "d:/MyXml.xml";
		SAXReader reader = new SAXReader();
		try {
			org.dom4j.Document document = reader.read(new File(target));
			Element root = document.getRootElement();
			List<Element> list = root.elements();
			for (Element element : list) {
				root.remove(element);
			}
			//给xml加上标号,
			int j=0;
			for (int i=0;i< bookMarkInfos.size();i++) {
				BookMarkInfo bookMarkInfo =bookMarkInfos.get(i);
				//判断是否为第一级
				if(bookMarkInfo.getLevel()==1){
					//为了是第一个不用加编号
					if(i!=0){
						j++ ;
					}
					
				}
				if (bookMarkInfo.getLevel() == 1) {
					Element element = root.addElement("item");
					element.addAttribute("id", bookMarkInfo.getId());
					//加上编号
					if(j==0){
						element.addAttribute("name", bookMarkInfo.getName());
					}else{
						
						element.addAttribute("name", j+bookMarkInfo.getName());
					}
					element.addAttribute("filename", bookMarkInfo.getId()
							+ ".docx");
					GetXElement(element, bookMarkInfo, bookMarkInfos,j+"",0);
				}
			}
			saveXml(target, document);
			//遍历xml
		} catch (org.dom4j.DocumentException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	private static void saveXml(String target, org.dom4j.Document document) {
		try {
			OutputFormat outputFormat = OutputFormat.createPrettyPrint();
			outputFormat.setLineSeparator("\r\n");
			OutputStreamWriter outputStreamWriter = new OutputStreamWriter(
					new FileOutputStream(target), "UTF-8");
			XMLWriter xmlWriter = new XMLWriter(outputStreamWriter,
					outputFormat);
			xmlWriter.write(document);
			xmlWriter.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	private static void GetXElement(Element parent, BookMarkInfo item,
			List<BookMarkInfo> bookMarkInfos,String  j,int k) {
		k=0;
		for (BookMarkInfo bookMarkInfo : bookMarkInfos) {
			if (bookMarkInfo.getLevel() == item.getLevel() + 1
					&& bookMarkInfo.getPid() == item.getId()) {
				k++ ;
				Element element = parent.addElement("item");
				element.addAttribute("id", bookMarkInfo.getId());
				element.addAttribute("name", j+"."+k+bookMarkInfo.getName());
				element.addAttribute("filename", bookMarkInfo.getId() + ".docx");
				GetXElement(element, bookMarkInfo, bookMarkInfos,j+"."+k,k);
			}
		}
	}
}

 

<?xml version="1.0" encoding="UTF-8"?>
<item id="1" name="1" filename="a.docx" author="杨稳" publishtime="2017-02-03" keywords="关键词" abstract="摘要" journal="" issn="" Implementation="2017-04-01"  groupname="国标" phase="研制阶段" content="内容分类">
  <!-- id=编号(标准规范),name=名称,filename=文件名称,author=作者,publishtime="发表时间",keywords="关键词",abstract="摘要",journal="所属期刊",issn=ISSN,Implementation="实施日期",groupname="国标",phase="研制阶段",content="内容分类"-->
  <item id="1.1" name="1.1" filename="a.docx"/>
  <item id="1.2" name="1.2" filename="a.docx">
    <item id="1.2.1" name="1.2.1" filename="a.docx"/>
    <item id="1.2.2" name="1.2.2" filename="a.docx"/>
  </item>
  <item id="1.3" name="1.3" filename="a.docx">
    <item id="1.3.1" name="1.3.1" filename="a.docx"/>
  </item>
  <item id="1.4" name="1.4" filename="a.docx"/>
</item>

最终生成的 xml 主是要为了后续往数据库保存为了:最终生成的xml是:

<?xml version="1.0" encoding="UTF-8"?>

<item id="1" name="1" filename="a.docx" author="杨稳" publishtime="2017-02-03" keywords="关键词" abstract="摘要" journal="" issn="" Implementation="2017-04-01" groupname="国标" phase="研制阶段" content="内容分类"> 
  <!-- id=编号(标准规范),name=名称,filename=文件名称,author=作者,publishtime="发表时间",keywords="关键词",abstract="摘要",journal="所属期刊",issn=ISSN,Implementation="实施日期",groupname="国标",phase="研制阶段",content="内容分类"-->  
  <item id="cdc88f18-a024-4e88-9ba1-3d5c5a7a0c58" name="首页" filename="cdc88f18-a024-4e88-9ba1-3d5c5a7a0c58.docx"
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值