读取word文本
package docx;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
/**
* 读文本
*/
public class TextRead {
public static void main(String[] args) throws Exception {
readDocx();
}
public static void readDocx() throws Exception{
InputStream is;
is = new FileInputStream("test.docx");
XWPFDocument xwpf = new XWPFDocument(is);
List<IBodyElement> ibs = xwpf.getBodyElements();
for(IBodyElement ib : ibs) {
BodyElementType elementType = ib.getElementType();
//表格
if(elementType == BodyElementType.TABLE) {
System.out.println("table"+ib.getPart());
}else {
//段落
XWPFParagraph para = (XWPFParagraph) ib;
System.out.println("It is a new paragraph...The indention is "+para.getFirstLineIndent());
List<XWPFRun> runs = para.getRuns();
System.out.println("run");
if(runs.size() <= 0) {
System.out.println("empty line");
}
for(XWPFRun run : runs) {
//如果片段没有文字,可能该片段是图片
if(StringUtils.isEmpty(run.text())) {
//该片段是图片时
if(run.getEmbeddedPictures().size() > 0) {
System.out.println("image***"+run.getEmbeddedPictures());
}else {
System.out.println("objects:"+run.getCTR().getObjectList());
//公式
if(run.getCTR().xmlText().indexOf("instrText") > 0) {
System.out.println("there is an equation field");
}
}
}else {
System.out.println("==="+run.getCharacterSpacing()+run.text());
}
}
}
}
is.close();
}
}
读取word图片
package docx;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.imageio.ImageIO;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.Document;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFRun;
/**
* 图片读取
*/
public class ImageRead {
public static void imageRead() throws IOException, InvalidFormatException {
File docFile = new File("simple.docx");
XWPFDocument doc = new XWPFDocument(OPCPackage.openOrCreate(docFile));
int i = 0;
for (XWPFParagraph p : doc.getParagraphs()) {
for (XWPFRun run : p.getRuns()) {
System.out.println("a new run");
for (XWPFPicture pic : run.getEmbeddedPictures()) {
System.out.println(pic.getCTPicture().xmlText());
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCx());
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCy());
// image显示大小 以厘米为单位
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCx() / 360000.0);
System.out.println(pic.getCTPicture().getSpPr().getXfrm().getExt().getCy() / 360000.0);
int type = pic.getPictureData().getPictureType();
byte[] img = pic.getPictureData().getData();
BufferedImage bufferedImage = ImageIO.read(new ByteArrayInputStream(img));
System.out.println(bufferedImage.getWidth());
System.out.println(bufferedImage.getHeight());
String extension = "";
switch (type) {
case Document.PICTURE_TYPE_EMF:
extension = ".emf";
break;
case Document.PICTURE_TYPE_WMF:
extension = ".wmf";
break;
case Document.PICTURE_TYPE_PICT:
extension = ".pic";
break;
case Document.PICTURE_TYPE_PNG:
extension = ".png";
break;
case Document.PICTURE_TYPE_DIB:
extension = ".dib";
break;
default:
extension = ".jpg";
break;
}
FileOutputStream fos = new FileOutputStream("test"+i+extension);
fos.write(img);
fos.close();
i++;
}
}
}
}
public static void main(String[] args) {
}
}
将图片保存到word
package docx;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.BreakType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
/**
* 图片写入,图片的保存
*/
public class ImageWriter {
public static void main(String[] args) throws InvalidFormatException, FileNotFoundException, IOException {
XWPFDocument doc = new XWPFDocument();
XWPFParagraph paragraph = doc.createParagraph();
XWPFRun run = paragraph.createRun();
String[] imgFiles = { "D:\\image\\303951486484.jpg", "D:\\image\\304032384201.jpg" };
for (String imgFile : imgFiles) {
int format = 0;
if (imgFile.endsWith(".emf"))
format = XWPFDocument.PICTURE_TYPE_EMF;
else if (imgFile.endsWith(".wmf"))
format = XWPFDocument.PICTURE_TYPE_WMF;
else if (imgFile.endsWith(".pict"))
format = XWPFDocument.PICTURE_TYPE_PICT;
else if (imgFile.endsWith(".jpeg") || imgFile.endsWith(".jpg"))
format = XWPFDocument.PICTURE_TYPE_JPEG;
else if (imgFile.endsWith(".png"))
format = XWPFDocument.PICTURE_TYPE_PNG;
else if (imgFile.endsWith(".dib"))
format = XWPFDocument.PICTURE_TYPE_DIB;
else if (imgFile.endsWith(".gif"))
format = XWPFDocument.PICTURE_TYPE_GIF;
else if (imgFile.endsWith(".tiff"))
format = XWPFDocument.PICTURE_TYPE_TIFF;
else if (imgFile.endsWith(".eps"))
format = XWPFDocument.PICTURE_TYPE_EPS;
else if (imgFile.endsWith(".bmp"))
format = XWPFDocument.PICTURE_TYPE_BMP;
else if (imgFile.endsWith(".wpg"))
format = XWPFDocument.PICTURE_TYPE_WPG;
else {
System.err.println(
"Unsupported picture:" + imgFile + ". Expected emf|wmf|pict|jpeg|png|dib|gif|tiff|eps|bmp|wpg");
continue;
}
//文件路径
run.setText(imgFile);
//换行
run.addBreak();
//200*200
run.addPicture(new FileInputStream(imgFile), format, imgFile, Units.toEMU(200), Units.toEMU(200));
//每张图片的末尾添加一个分页符
run.addBreak(BreakType.PAGE);
}
FileOutputStream out = new FileOutputStream("images.docx");
doc.write(out);
out.close();
}
}
读取word表格内容
package docx;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
/**
* docx的表格内容的读取
*/
public class TableRead {
public static void main(String[] args) throws Exception {
testTable();
}
private static void testTable() throws Exception{
InputStream inputStream = new FileInputStream("simple2.docx");
XWPFDocument document = new XWPFDocument(inputStream);
List<XWPFParagraph> paragraphs = document.getParagraphs();
List<IBodyElement> bodyElements = document.getBodyElements();
for(IBodyElement bodyElement : bodyElements) {
BodyElementType elementType = bodyElement.getElementType();
if(elementType == BodyElementType.TABLE) {
//表格
System.out.println("table"+bodyElement.getPart());
XWPFTable table = (XWPFTable) bodyElement;
List<XWPFTableRow> rows = table.getRows();
//读取每一行数据
for(int i = 0; i < rows.size(); i++) {
XWPFTableRow row = rows.get(i);
//读取每一列数据
List<XWPFTableCell> cells = row.getTableCells();
for(int j = 0; j < cells.size(); j++) {
XWPFTableCell cell = cells.get(j);
System.out.println(cell.getText());
List<XWPFParagraph> cellParagraphs = cell.getParagraphs();
System.out.println(cellParagraphs.size());
}
}
}else {
//段落
XWPFParagraph para = (XWPFParagraph) bodyElement;
System.out.println("It is a new paragraph...The indention is "+para.getFirstLineIndent());
List<XWPFRun> runs = para.getRuns();
System.out.println("run");
if(runs.size() <= 0) {
System.out.println("empty line");
}
for(XWPFRun run : runs) {
//如果片段没有文字,可能该片段是图片
if(StringUtils.isEmpty(run.text())) {
//该片段是图片时
if(run.getEmbeddedPictures().size() > 0) {
System.out.println("image***"+run.getEmbeddedPictures());
}else {
System.out.println("objects:"+run.getCTR().getObjectList());
//公式
if(run.getCTR().xmlText().indexOf("instrText") > 0) {
System.out.println("there is an equation field");
}
}
}else {
System.out.println("==="+run.getCharacterSpacing()+run.text());
}
}
}
}
}
}
在word中写入表格
package docx;
import java.io.FileOutputStream;
import java.io.OutputStream;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
/**
* 写入表格
*/
public class TableWriter {
public static void main(String[] args) throws Exception {
try {
createSimpleTable();
}catch (Exception e) {
System.out.println("Error trying to create simple table.");
throw(e);
}
}
private static void createSimpleTable() throws Exception{
XWPFDocument doc = new XWPFDocument();
try {
XWPFTable table = doc.createTable(3,3);
table.getRow(1).getCell(1).setText("表格示例");
XWPFParagraph p1 = table.getRow(0).getCell(0).getParagraphs().get(0);
XWPFRun run1 = p1.createRun();
run1.setBold(true);
run1.setText("The quick brown fox");
run1.setItalic(true);
run1.setFontFamily("Courier");
run1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
run1.setTextPosition(100);
table.getRow(2).getCell(2).setText("only text");
OutputStream out = new FileOutputStream("simpleTable.docx");
try {
doc.write(out);
}finally {
out.close();
}
}finally {
doc.close();
}
}
}
word模板套写
package docx;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
public class TemplateTest {
public static void main(String[] args) throws InvalidFormatException, FileNotFoundException, Exception {
// 导入模板文件
XWPFDocument doc = openDocx("template.docx");
// 文字类 key-value
Map<String, Object> params = new HashMap<String, Object>();
params.put("${name}", "Tom");
params.put("${sex}", "男");
// 图片类 key-url
Map<String, String> picParams = new HashMap<String, String>();
picParams.put("${pic}", "D:\\image\\303951486484.jpg");
List<IBodyElement> bodyElements = doc.getBodyElements();
for (IBodyElement bodyElement : bodyElements) {
if (bodyElement.getElementType() == BodyElementType.TABLE) {
replaceTable(bodyElement, params, picParams, doc);
}
}
// 输出
writeDocx(doc, new FileOutputStream("template2.docx"));
}
private static void writeDocx(XWPFDocument doc, OutputStream outputStream) {
try {
doc.write(outputStream);
outputStream.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (outputStream != null) {
try {
outputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private static void replaceTable(IBodyElement bodyElement, Map<String, Object> params,
Map<String, String> picParams, XWPFDocument doc) {
Matcher matcher;
XWPFTable table;
List<XWPFTableRow> rows;
List<XWPFTableCell> cells;
table = (XWPFTable) bodyElement;
rows = table.getRows();
for (XWPFTableRow row : rows) {
cells = row.getTableCells();
int cellSize = cells.size();
int cellCount = 0;
for (cellCount = 0; cellCount < cellSize; cellCount++) {
XWPFTableCell cell = cells.get(cellCount);
String runText = "";
List<XWPFParagraph> paragraphs = cell.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
List<XWPFRun> runs = paragraph.getRuns();
for (XWPFRun run : runs) {
runText = run.text();
matcher = matcher(runText);
if (matcher.find()) {
if (picParams != null) {
for (String picKey : picParams.keySet()) {
if (matcher.group().equals(picKey)) {
run.setText("", 0);
replacePic(run, picParams.get(picKey), doc);
}
}
}
if (params != null) {
for (String paramKey : params.keySet()) {
if (matcher.group().equals(paramKey)) {
run.setText(params.get(paramKey) + "", 0);
}
}
}
}
}
}
}
}
}
private static void replacePic(XWPFRun run, String imgFile, XWPFDocument doc) {
try {
int format = 0;
if (imgFile.endsWith(".emf"))
format = XWPFDocument.PICTURE_TYPE_EMF;
else if (imgFile.endsWith(".wmf"))
format = XWPFDocument.PICTURE_TYPE_WMF;
else if (imgFile.endsWith(".pict"))
format = XWPFDocument.PICTURE_TYPE_PICT;
else if (imgFile.endsWith(".jpeg") || imgFile.endsWith(".jpg"))
format = XWPFDocument.PICTURE_TYPE_JPEG;
else if (imgFile.endsWith(".png"))
format = XWPFDocument.PICTURE_TYPE_PNG;
else if (imgFile.endsWith(".dib"))
format = XWPFDocument.PICTURE_TYPE_DIB;
else if (imgFile.endsWith(".gif"))
format = XWPFDocument.PICTURE_TYPE_GIF;
else if (imgFile.endsWith(".tiff"))
format = XWPFDocument.PICTURE_TYPE_TIFF;
else if (imgFile.endsWith(".eps"))
format = XWPFDocument.PICTURE_TYPE_EPS;
else if (imgFile.endsWith(".bmp"))
format = XWPFDocument.PICTURE_TYPE_BMP;
else if (imgFile.endsWith(".wpg"))
format = XWPFDocument.PICTURE_TYPE_WPG;
else {
System.err.println(
"Unsupported picture:" + imgFile + ". Expected emf|wmf|pict|jpeg|png|dib|gif|tiff|eps|bmp|wpg");
}
// 文件路径
run.setText(imgFile);
// 换行
run.addBreak();
// 200*200
run.addPicture(new FileInputStream(imgFile), format, imgFile, Units.toEMU(200), Units.toEMU(200));
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static Matcher matcher(String str) {
Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(str);
return matcher;
}
private static XWPFDocument openDocx(String fileName) throws IOException {
InputStream inputStream = null;
try {
inputStream = new FileInputStream(fileName);
XWPFDocument document = new XWPFDocument(inputStream);
return document;
} catch (Exception e) {
e.printStackTrace();
} finally {
inputStream.close();
}
return null;
}
}