import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import org.docx4j.Docx4J;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.fonts.PhysicalFont;
import org.docx4j.fonts.PhysicalFonts;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.P;
import org.docx4j.wml.R;
import org.docx4j.wml.STBrType;
public class WordUtil {
/**
*
* @param xwpfDocument poi包里的类
* @return
* @throws IOException
* @throws Docx4JException
*/
// public static WordprocessingMLPackage loadMLPackage(
// XWPFDocument xwpfDocument) throws IOException,
// Docx4JException {
// InputStream is = null;
// try (ByteArrayOutputStream out = new ByteArrayOutputStream();) {
// xwpfDocument.write(out);
// is = new ByteArrayInputStream(out.toByteArray());
// return WordprocessingMLPackage.load(is);
// } finally {
// IOUtils.closeQuietly(is);
// }
// }
/**
* 将word输出到某个pdf文件savePdfPath中
* @param wordMLPackage
* @param savePdfPath
* @throws Exception
*/
public static void convertWordToPdf(WordprocessingMLPackage wordMLPackage,String savePdfPath) throws Exception{
OutputStream os = new java.io.FileOutputStream(savePdfPath);
convertWordToPdf(wordMLPackage,os);
}
/**
* 将word文件保存输出到输出流
* @param wordMLPackage
* @param os
* @throws Exception
*/
public static void convertWordToPdf(WordprocessingMLPackage wordMLPackage,OutputStream os ) throws Exception {
// Font regex (optional)
// Set regex if you want to restrict to some defined subset of fonts
// Here we have to do this before calling createContent,
// since that discovers fonts
String regex = null;
// Windows:
// String
// regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*";
regex=".*(simsun|calibri|camb|cour|courier new|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingdings|palatino).*";
// Mac
// String
// regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*";
//PhysicalFonts.setRegex(regex);
PhysicalFonts.setRegex(regex);
// Set up font mapper (optional)
Mapper fontMapper = new IdentityPlusMapper();
// .. example of mapping font Times New Roman which doesn't have certain Arabic glyphs
// eg Glyph "ي" (0x64a, afii57450) not available in font "TimesNewRomanPS-ItalicMT".
// eg Glyph "ج" (0x62c, afii57420) not available in font "TimesNewRomanPS-ItalicMT".
// to a font which does
PhysicalFont font = PhysicalFonts.get("arial unicode ms");
// make sure this is in your regex (if any)!!!
if (font!=null) {
fontMapper.put(Mapper.FONT_FALLBACK, font);
}
//fontMapper.getFontMappings().put("Libian SC Regular", PhysicalFonts.getPhysicalFonts().get("SimSun"));
wordMLPackage.setFontMapper(fontMapper);
// FO exporter setup (required)
// .. the FOSettings object
FOSettings foSettings = Docx4J.createFOSettings();
foSettings.setWmlPackage(wordMLPackage);
// Specify whether PDF export uses XSLT or not to create the FO
// (XSLT takes longer, but is more complete).
// Don't care what type of exporter you use
Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
// Prefer the exporter, that uses a xsl transformation
// Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
// Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor)
// .. faster, but not yet at feature parity
// Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL);
}
// public InputStream transOutputStreamToInputStream(ByteArrayOutputStream out){
// InputStream is = null;
// is = new ByteArrayInputStream(out.toByteArray());
// return is;
// ByteArrayOutputStream out = new ByteArrayOutputStream()
// }
/**
* 创建分页符
* @return
*/
public static P getPageBreak() {
P p = new P();
R r = new R();
org.docx4j.wml.Br br = new org.docx4j.wml.Br();
br.setType(STBrType.PAGE);
r.getContent().add(br);
p.getContent().add(r);
return p;
}
/**
* 合并成word文档
*
* @param newMLPackage
* @param srcMLPackage
* @param b
* @throws InvalidFormatException
*/
public static WordprocessingMLPackage mergeWord2010Util(List<WordprocessingMLPackage> srcMLPackageList, boolean b) throws InvalidFormatException {
WordprocessingMLPackage newMLPackage = WordprocessingMLPackage.createPackage();
int l=srcMLPackageList.size();
for(int i=0;i<l;i++){
WordprocessingMLPackage item = srcMLPackageList.get(i);
List<Object> list = item.getMainDocumentPart().getContent();
for (Object obj : list) {
newMLPackage.getMainDocumentPart().addObject(obj);
}
if (b) {// 判断是否需要换页符
if( i != (l-1)){
newMLPackage.getMainDocumentPart().addObject(WordUtil.getPageBreak());
}
}
}
if ( l == 0) return null;
return newMLPackage;
}
/**
* 将WordprocessingMLPackage导出到输出流
* @param wordMLPackage
* @param baos
* @throws Docx4JException
*/
public static void convertWordprocessingMLPackageToOutputStream(WordprocessingMLPackage wordMLPackage,ByteArrayOutputStream out) throws Docx4JException{
FOSettings foSettings = Docx4J.createFOSettings();
foSettings.setWmlPackage(wordMLPackage);
// want the fo document as the result.
foSettings.setApacheFopMime(FOSettings.INTERNAL_FO_MIME);
// ByteArrayOutputStream baos = new ByteArrayOutputStream();
//Don't care what type of exporter you use
// Docx4J.toFO(foSettings, os, Docx4J.FLAG_NONE);
//Prefer the exporter, that uses a xsl transformation
Docx4J.toFO(foSettings, out, Docx4J.FLAG_EXPORT_PREFER_XSL);
}
/**
* 将wordMLPackage转成输出流
* @param wordMLPackage
* @return
* @throws Docx4JException
*/
public InputStream wordprocessingMLPackageToInputStream(WordprocessingMLPackage wordMLPackage) throws Docx4JException{
InputStream is = null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
convertWordprocessingMLPackageToOutputStream(wordMLPackage, out);
is = new ByteArrayInputStream(out.toByteArray());
return is;
}
}
工具测试类
import java.util.ArrayList;
import java.util.List;
import myutils.WordUtil;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
public class WordUtilTest {
private static String[] docnames = {"\\myexamples\\file_1.docx","\\myexamples\\file_2.docx"};
private static String out_put_docnames = "\\myexamples\\files.docx";
private static String out_put_pdf = "\\myexamples\\files.pdf";
public static void main(String[] args) throws Exception {
WordUtilTest.mergeWord2010UtilTest();
}
public static void mergeWord2010UtilTest() throws Exception{
List<WordprocessingMLPackage> srcPkgList=new ArrayList<WordprocessingMLPackage>();
for(int i = 0,l = docnames.length;i<l;i++){
String docName = docnames[i];
String docPath = System.getProperty("user.dir")+docName;
WordprocessingMLPackage wmlSourcePkg = WordprocessingMLPackage.load(new java.io.File(docPath));
srcPkgList.add(wmlSourcePkg);
}
// XWPFDocument document = new XWPFDocument();
// ByteArrayOutputStream out = new ByteArrayOutputStream();
// document.write(out );
// InputStream is = new ByteArrayInputStream(out.toByteArray());
WordprocessingMLPackage newMLPackage = WordUtil.mergeWord2010Util( srcPkgList, true);
//保存成文件
//newMLPackage.save(new File( System.getProperty("user.dir")+out_put_docnames));
// ByteArrayOutputStream out = new ByteArrayOutputStream();
// WordUtil.convertWordprocessingMLPackageToOutputStream(newMLPackage, out);
// System.out.println(out.toByteArray().length);
//转成PDF
WordUtil.convertWordToPdf(newMLPackage, System.getProperty("user.dir")+out_put_pdf);
}
}