使用工具包
Apache POI
。
主要类:
- XWPFDocument整个文档对象
- XWPFParagraph段落
- XWPFRun一个片段(字体样式相同的一段)
- XWPFPicture图片
- XWPFTable表格
配置MAVEN
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>WEEK6_docx</groupId>
<artifactId>WEEK6_docx</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<repositories>
<repository>
<id>central</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>central</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</pluginRepository>
</pluginRepositories>
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/xmpbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>xmpbox</artifactId>
<version>2.0.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13</version>
</dependency>
</dependencies>
</project>
文本读
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import javax.xml.namespace.QName;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.Document;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTObject;
public class TextRead {
public static void main(String[] args) throws Exception {
readDocx();
}
public static void readDocx() throws Exception {
InputStream is;
is = new FileInputStream("test.docx");//将test里的内容作为文件输入流
XWPFDocument xwpf = new XWPFDocument(is);
List<IBodyElement> ibs= xwpf.getBodyElements();//将文件输入流的元素分段
for(IBodyElement ib:ibs)
{
BodyElementType bet = ib.getElementType();
if(bet== BodyElementType.TABLE)
{
//表格
System.out.println("table" + ib.getPart());
}
else
{
//段落
XWPFParagraph para = (XWPFParagraph) ib;
System.out.println("It is a new paragraph....The indention is "
+ para.getFirstLineIndent() + "," + para.getIndentationFirstLine() );
//System.out.println(para.getCTP().xmlText());
List<XWPFRun> res = para.getRuns();
//System.out.println("run");
if(res.size()<=0)
{
System.out.println("empty line");
}
for(XWPFRun re: res)
{
if(null == re.text()||re.text().length()<=0)
{
if(re.getEmbeddedPictures().size()>0)
{
System.out.println("image***" + re.getEmbeddedPictures().size());
} else
{
System.out.println("objects:" + re.getCTR().getObjectList().size());
if(re.getCTR().xmlText().indexOf("instrText") > 0) {
System.out.println("there is an equation field");
}
else
{
//System.out.println(re.getCTR().xmlText());
}
}
}
else
{
System.out.println("=== "+ re.getCharacterSpacing() + re.text());
}
}
}
}
is.close();
}
}
图片读
/**
* 本类 完成docx的图片读取工作
*/
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import javax.imageio.ImageIO;
public class ImageRead {
public static void imageRead() throws IOException, InvalidFormatException {
File docFile = new File("simple.docx");
XWPFDocument doc = new XWPFDocument(OPCPackage.openOrCreate(docFile));//打开文件
int i = 0;
//抽取图片
for (XWPFParagraph p : doc.getParagraphs()) {
for (XWPFRun run : p.getRuns()) {
System.out.println("a new run");
for (XWPFPicture pic : run.getEmbeddedPictures()) {
System.out.println(pic.getCTPicture().xmlText());
//image EMU(English Metric Unit)
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCx());
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCy());
//image 显示大小 以厘米为单位
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCx()/360000.0);
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCy()/360000.0);
int type = pic.getPictureData().getPictureType();
byte [] img = pic.getPictureData().getData();
BufferedImage bufferedImage= ImageRead.byteArrayToImage(img);
System.out.println(bufferedImage.getWidth());
System.out.println(bufferedImage.getHeight());
String extension = "";
switch(type)
{
case Document.PICTURE_TYPE_EMF: extension = ".emf";
break;
case Document.PICTURE_TYPE_WMF: extension = ".wmf";
break;
case Document.PICTURE_TYPE_PICT: extension = ".pic";
break;
case Document.PICTURE_TYPE_PNG: extension = ".png";
break;
case Document.PICTURE_TYPE_DIB: extension = ".dib";
break;
default: extension = ".jpg";
}
//outputFile = new File ( );
//BufferedImage image = ImageIO.read(new File(img));
//ImageIO.write(image , "png", outputfile);
FileOutputStream fos = new FileOutputStream("test" + i + extension);
fos.write(img);
fos.close();
i++;
}
}
}
}
public static BufferedImage byteArrayToImage(byte[] bytes){
BufferedImage bufferedImage=null;
try {
InputStream inputStream = new ByteArrayInputStream(bytes);
bufferedImage = ImageIO.read(inputStream);
} catch (IOException ex) {
System.out.println(ex.getMessage());
}
return bufferedImage;
}
public static void main(String[] args) throws Exception {
imageRead();
}
}
图片写
/**
* 本类完成docx的图片保存工作
*/
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import java.io.FileInputStream;
import java.io.FileOutputStream;
public class ImageWrite {
public static void main(String[] args) throws Exception {
XWPFDocument doc = new XWPFDocument();
XWPFParagraph p = doc.createParagraph();
XWPFRun r = p.createRun();
String[] imgFiles = new String[2];
imgFiles[0] = "c:/temp/ecnu.jpg";
imgFiles[1] = "c:/temp/shida.jpg";
for(String imgFile : imgFiles) {
int format;
if(imgFile.endsWith(".emf")) format = XWPFDocument.PICTURE_TYPE_EMF;
else if(imgFile.endsWith(".wmf")) format = XWPFDocument.PICTURE_TYPE_WMF;
else if(imgFile.endsWith(".pict")) format = XWPFDocument.PICTURE_TYPE_PICT;
else if(imgFile.endsWith(".jpeg") || imgFile.endsWith(".jpg")) format = XWPFDocument.PICTURE_TYPE_JPEG;
else if(imgFile.endsWith(".png")) format = XWPFDocument.PICTURE_TYPE_PNG;
else if(imgFile.endsWith(".dib")) format = XWPFDocument.PICTURE_TYPE_DIB;
else if(imgFile.endsWith(".gif")) format = XWPFDocument.PICTURE_TYPE_GIF;
else if(imgFile.endsWith(".tiff")) format = XWPFDocument.PICTURE_TYPE_TIFF;
else if(imgFile.endsWith(".eps")) format = XWPFDocument.PICTURE_TYPE_EPS;
else if(imgFile.endsWith(".bmp")) format = XWPFDocument.PICTURE_TYPE_BMP;
else if(imgFile.endsWith(".wpg")) format = XWPFDocument.PICTURE_TYPE_WPG;
else {
System.err.println("Unsupported picture: " + imgFile +
". Expected emf|wmf|pict|jpeg|png|dib|gif|tiff|eps|bmp|wpg");
continue;
}
r.setText(imgFile);
r.addBreak();
r.addPicture(new FileInputStream(imgFile), format, imgFile, Units.toEMU(200), Units.toEMU(200)); // 200x200 pixels
r.addBreak(BreakType.PAGE);
}
FileOutputStream out = new FileOutputStream("images.docx");
doc.write(out);
out.close();
}
}
表格读
/**
* 本类完成docx的表格内容读取
*/
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
public class TableRead {
public static void main(String[] args) throws Exception {
testTable();
}
public static void testTable() throws Exception {
InputStream is = new FileInputStream("simple2.docx");
XWPFDocument xwpf = new XWPFDocument(is);
List<XWPFParagraph> paras = xwpf.getParagraphs();
//List<POIXMLDocumentPart> pdps = xwpf.getRelations();
List<IBodyElement> ibs= xwpf.getBodyElements();
for(IBodyElement ib:ibs)
{
BodyElementType bet = ib.getElementType();
if(bet== BodyElementType.TABLE)
{
//表格
System.out.println("table" + ib.getPart());
XWPFTable table = (XWPFTable) ib;
List<XWPFTableRow> rows=table.getRows();
//读取每一行数据
for (int i = 0; i < rows.size(); i++) {
XWPFTableRow row = rows.get(i);
//读取每一列数据
List<XWPFTableCell> cells = row.getTableCells();
for (int j = 0; j < cells.size(); j++) {
XWPFTableCell cell=cells.get(j);
System.out.println(cell.getText());
List<XWPFParagraph> cps = cell.getParagraphs();
System.out.println(cps.size());
}
}
}
else
{
//段落
XWPFParagraph para = (XWPFParagraph) ib;
System.out.println("It is a new paragraph....The indention is "
+ para.getFirstLineIndent() + "," + para.getIndentationFirstLine() + ","
+ para.getIndentationHanging()+"," + para.getIndentationLeft() + ","
+ para.getIndentationRight() + "," + para.getIndentFromLeft() + ","
+ para.getIndentFromRight()+"," + para.getAlignment().getValue());
//System.out.println(para.getAlignment());
//System.out.println(para.getRuns().size());
List<XWPFRun> res = para.getRuns();
System.out.println("run");
if(res.size()<=0)
{
System.out.println("empty line");
}
for(XWPFRun re: res)
{
if(null == re.text()||re.text().length()<=0)
{
if(re.getEmbeddedPictures().size()>0)
{
System.out.println("image***" + re.getEmbeddedPictures().size());
}
else
{
System.out.println("objects:" + re.getCTR().getObjectList().size());
System.out.println(re.getCTR().xmlText());
}
}
else
{
System.out.println("===" + re.text());
}
}
}
}
is.close();
}
}
表格写
/*
* 本类测试写入表格
*/
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.util.List;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
public class TableWrite {
public static void main(String[] args) throws Exception {
try {
createSimpleTable();
}
catch(Exception e) {
System.out.println("Error trying to create simple table.");
throw(e);
}
}
public static void createSimpleTable() throws Exception {
XWPFDocument doc = new XWPFDocument();
try {
XWPFTable table = doc.createTable(3, 3);
table.getRow(1).getCell(1).setText("表格示例");
XWPFParagraph p1 = table.getRow(0).getCell(0).getParagraphs().get(0);
XWPFRun r1 = p1.createRun();
r1.setBold(true);
r1.setText("The quick brown fox");
r1.setItalic(true);
r1.setFontFamily("Courier");
r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
r1.setTextPosition(100);
table.getRow(2).getCell(2).setText("only text");
OutputStream out = new FileOutputStream("simpleTable.docx");
try {
doc.write(out);
} finally {
out.close();
}
} finally {
doc.close();
}
}
}
按格式写
假如有模板如下:
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.Document;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
public class TemplateTest {
public static void main(String[] args) throws Exception {
XWPFDocument doc = openDocx("template.docx");//导入模板文件
Map<String, Object> params = new HashMap<String, Object>();//文字类 key-value
params.put("${name}", "Tom");
params.put("${sex}", "男");
Map<String,String> picParams = new HashMap<String,String>();//图片类 key-url
picParams.put("${pic}", "c:/temp/ecnu.jpg");
List<IBodyElement> ibes = doc.getBodyElements();
for (IBodyElement ib : ibes) {
if (ib.getElementType() == BodyElementType.TABLE) {
replaceTable(ib, params, picParams, doc);
}
}
writeDocx(doc, new FileOutputStream("template2.docx"));//输出
}
public static XWPFDocument openDocx(String url) {
InputStream in = null;
try {
in = new FileInputStream(url);
XWPFDocument doc = new XWPFDocument(in);
return doc;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return null;
}
public static void writeDocx(XWPFDocument outDoc, OutputStream out) {
try {
outDoc.write(out);
out.flush();
if (out != null) {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
private static Matcher matcher(String str) {
Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(str);
return matcher;
}
/**
* 写入image
* @param run
* @param imgFile
* @param doc
* @throws InvalidFormatException
* @throws FileNotFoundException
* @throws IOException
*/
public static void replacePic(XWPFRun run, String imgFile, XWPFDocument doc) throws Exception {
int format;
if (imgFile.endsWith(".emf"))
format = Document.PICTURE_TYPE_EMF;
else if (imgFile.endsWith(".wmf"))
format = Document.PICTURE_TYPE_WMF;
else if (imgFile.endsWith(".pict"))
format = Document.PICTURE_TYPE_PICT;
else if (imgFile.endsWith(".jpeg") || imgFile.endsWith(".jpg"))
format = Document.PICTURE_TYPE_JPEG;
else if (imgFile.endsWith(".png"))
format = Document.PICTURE_TYPE_PNG;
else if (imgFile.endsWith(".dib"))
format = Document.PICTURE_TYPE_DIB;
else if (imgFile.endsWith(".gif"))
format = Document.PICTURE_TYPE_GIF;
else if (imgFile.endsWith(".tiff"))
format = Document.PICTURE_TYPE_TIFF;
else if (imgFile.endsWith(".eps"))
format = Document.PICTURE_TYPE_EPS;
else if (imgFile.endsWith(".bmp"))
format = Document.PICTURE_TYPE_BMP;
else if (imgFile.endsWith(".wpg"))
format = Document.PICTURE_TYPE_WPG;
else {
System.err.println(
"Unsupported picture: " + imgFile + ". Expected emf|wmf|pict|jpeg|png|dib|gif|tiff|eps|bmp|wpg");
return;
}
if(imgFile.startsWith("http")||imgFile.startsWith("https")){
run.addPicture(new URL(imgFile).openConnection().getInputStream(), format, "rpic",Units.toEMU(100),Units.toEMU(100));
}else{
run.addPicture(new FileInputStream(imgFile), format, "rpic",Units.toEMU(100),Units.toEMU(100));
}
}
/**
* 替换表格内占位符
* @param para 表格对象
* @param params 文字替换map
* @param picParams 图片替换map
* @param indoc
* @throws Exception
*/
public static void replaceTable(IBodyElement para ,Map<String, Object> params,
Map<String, String> picParams, XWPFDocument indoc)
throws Exception {
Matcher matcher;
XWPFTable table;
List<XWPFTableRow> rows;
List<XWPFTableCell> cells;
table = (XWPFTable) para;
rows = table.getRows();
for (XWPFTableRow row : rows) {
cells = row.getTableCells();
int cellsize = cells.size();
int cellcount = 0;
for(cellcount = 0; cellcount<cellsize;cellcount++){
XWPFTableCell cell = cells.get(cellcount);
String runtext = "";
List<XWPFParagraph> ps = cell.getParagraphs();
for (XWPFParagraph p : ps) {
for(XWPFRun run : p.getRuns()){
runtext = run.text();
matcher = matcher(runtext);
if (matcher.find()) {
if (picParams != null) {
for (String pickey : picParams.keySet()) {
if (matcher.group().equals(pickey)) {
run.setText("",0);
replacePic(run, picParams.get(pickey), indoc);
}
}
}
if (params != null) {
for (String pickey : params.keySet()) {
if (matcher.group().equals(pickey)) {
run.setText(params.get(pickey)+"",0);
}
}
}
}
}
}
}
}
}
}
docx转pdf
import java.awt.Color;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;
import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import fr.opensagres.xdocreport.itext.extension.font.IFontProvider;
import fr.opensagres.xdocreport.itext.extension.font.ITextFontRegistry;
/**
* XDocReportTest 将docx文档转为pdf
* @author Tom
*
*/
public class XDocReportTest {
public static void main(String[] args) throws Exception {
XWPFDocument doc = new XWPFDocument(new FileInputStream("template.docx"));// docx
PdfOptions options = PdfOptions.create();
options.fontProvider(new IFontProvider() {
// 设置中文字体
public Font getFont(String familyName, String encoding, float size, int style, Color color) {
try {
BaseFont bfChinese = BaseFont.createFont(
"C:\\Program Files (x86)\\Microsoft Office\\root\\VFS\\Fonts\\private\\STSONG.TTF",
BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
Font fontChinese = new Font(bfChinese, size, style, color);
if (familyName != null)
fontChinese.setFamily(familyName);
return fontChinese;
} catch (Throwable e) {
e.printStackTrace();
return ITextFontRegistry.getRegistry().getFont(familyName, encoding, size, style, color);
}
}
});
PdfConverter.getInstance().convert(doc, new FileOutputStream("template.pdf"), options);// pdf
}
}