1. poi读取docx
//读取纯文本内容
public static void readDoc() throws Exception{
InputStream inputStream = new FileInputStream("word.docx");
XWPFDocument document = new XWPFDocument(inputStream);
POIXMLTextExtractor extractor = new XWPFWordExtractor(document);
System.out.println(extractor.getText());
}
2. poi读取docx转html
public static void docx2html() throws Exception{
InputStream inputStream = new FileInputStream("word.docx");
XWPFDocument document = new XWPFDocument(inputStream);
//图片保存路径
File imgFolder = new File("D:\\workspace\\zsk_server\\image");
XHTMLOptions options = XHTMLOptions.create();
options.setExtractor(new FileImageExtractor(imgFolder));
// html中图片的路径 相对路径
options.URIResolver(new BasicURIResolver("image"));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 3) 将 XWPFDocument转换成XHTML
// 生成html文件上级文件夹
File folder = new File("");
if (!folder.exists()) {
folder.mkdirs();
}
OutputStream out = new FileOutputStream("word.html");
XHTMLConverter.getInstance().convert(document, out, options);
}
3. poi读取docx目录
poi读取docx目录,前提是文档有标准的导航目录。亲测支持自定义标题样式。
//标题抽取
public static void getTitle() throws Exception{
InputStream in = new FileInputStream("word.docx");
XWPFDocument doc = new XWPFDocument(in);
Iterator<XWPFParagraph> paragraphsIterator = doc.getParagraphsIterator();
XWPFStyles styles = doc.getStyles();
while(paragraphsIterator.hasNext()){
XWPFParagraph next = paragraphsIterator.next();
if(StringUtil.isNotEmpty(next.getStyleID())){
XWPFStyle style = styles.getStyle(next.getStyleID());
if(style.getName().startsWith("heading")){
System.out.println(style.getName());
System.out.println(next.getText());
}
}
}
}
4. poi读取docx添加水印
此操作为添加简单文字水印,也可同时设置页眉页脚,支持默认一致页眉和根据页码基偶区分;
来源:原作地址
public static void addWaterMarket() throws Exception{
InputStream in = new FileInputStream("word.docx");
XWPFDocument doc = new XWPFDocument(in);
//XWPFDocument doc= new XWPFDocument();
// the body content
XWPFParagraph paragraph = doc.createParagraph();
XWPFRun run=paragraph.createRun();
run.setText("The Body:");
// create header-footer
XWPFHeaderFooterPolicy headerFooterPolicy = doc.getHeaderFooterPolicy();
if (headerFooterPolicy == null) headerFooterPolicy = doc.createHeaderFooterPolicy();
// create default Watermark - fill color black and not rotated
headerFooterPolicy.createWatermark("Watermark");
// get the default header
// Note: createWatermark also sets FIRST and EVEN headers
// but this code does not updating those other headers
XWPFHeader header = headerFooterPolicy.getHeader(XWPFHeaderFooterPolicy.DEFAULT.intValue());
paragraph = header.getParagraphArray(0);
// get com.microsoft.schemas.vml.CTShape where fill color and rotation is set
org.apache.xmlbeans.XmlObject[] xmlobjects = paragraph.getCTP().getRArray(0).getPictArray(0).selectChildren(
new javax.xml.namespace.QName("urn:schemas-microsoft-com:vml", "shape"));
if (xmlobjects.length > 0) {
com.microsoft.schemas.vml.CTShape ctshape = (com.microsoft.schemas.vml.CTShape)xmlobjects[0];
ctshape.setFillcolor("#d8d8d8");
ctshape.setStyle(ctshape.getStyle() + ";rotation:90;");
}
doc.write(new FileOutputStream("CreateWordHeaderFooterWatermark.docx"));
doc.close();
}
5. poi写出docx 保留目录样式
根据预先定义的样式,导出word文档的标题层级和正文。
//标题样式复制
public static void writeWord() throws Exception {
InputStream in = new FileInputStream("word.docx");
XWPFDocument doc = new XWPFDocument(in);
List<XWPFParagraph> paragraphs = doc.getParagraphs();
XWPFParagraph paragraph = doc.createParagraph();
paragraph.createRun().setText("123456");
paragraph.getCTP().setPPr(paragraphs.get(0).getCTP().getPPr());
doc.write(new FileOutputStream("xxxxxx.docx"));
}
6. poi html转doc
poi操作html转doc效果较差,建议以其他方式实现。
public static void html2wrod() throws Exception{
InputStream is = new FileInputStream("xxxxx.html");
OutputStream os = new FileOutputStream("word.doc");
POIFSFileSystem fs = new POIFSFileSystem();
//对应于org.apache.poi.hdf.extractor.WordDocument
fs.createDocument(is, "WordDocument");
fs.writeFilesystem(os);
os.close();
is.close();
}