一 序
之前用POI读取Word内容,但是图片位置没有(能获得文档中的所有图片,但是你并不能知道这些图片是在哪里)。
看了大佬写的文章,才动手试一下:
pom.xml:
这个要注意,很多留言咨询问的主要是版本的问题。
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
util 代码:https://www.cnblogs.com/ct-csu/p/8178932.html
import org.apache.commons.collections.CollectionUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;
public class XWPFUtilsTest {
public static void main(String[] args) throws IOException {
InputStream in = new FileInputStream("/Users/benmu/Documents/17.医学心理学 第7版(1).docx");
XWPFDocument xwpfDocument = new XWPFDocument(in);
List<XWPFParagraph> paragraphList = xwpfDocument.getParagraphs();
System.out.println("图片的索引\t|图片名称\t|图片下一段文字的内容\t");
System.out.println("------------------------------------------");
for(int i = 0;i < paragraphList.size();i++){
List<String> imageBundleList = XWPFUtils.readImageInParagraph(paragraphList.get(i));
if(CollectionUtils.isNotEmpty(imageBundleList)){
for(String pictureId:imageBundleList){
XWPFPictureData pictureData = xwpfDocument.getPictureDataByID(pictureId);
String imageName = pictureData.getFileName();
String lastParagraphText = paragraphList.get(i+1).getParagraphText();
System.out.println(pictureId +"\t|" + imageName + "\t|" + lastParagraphText);
}
}
}
}
}
效果: