maven依赖
<dependency>
<groupId>com.deepoove</groupId>
<artifactId>poi-tl</artifactId>
<version>1.9.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>
获取文件的大纲
private static String getTitleLvl(XWPFDocument doc, XWPFParagraph para) {
String titleLvl = "";
try {
//判断该段落是否设置了大纲级别
if (para.getCTP().getPPr().getOutlineLvl() != null) {
BigInteger val = para.getCTP().getPPr().getOutlineLvl().getVal();
if (val == null) {
return titleLvl;
}
return val.compareTo(new BigInteger("8")) > 0 ? titleLvl : String.valueOf(val);
}
} catch (Exception e) {
}
try {
//判断该段落的样式是否设置了大纲级别
if (doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl() != null) {
return String.valueOf(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl().getVal());
}
} catch (Exception e) {
}
try {
//判断该段落的样式的基础样式是否设置了大纲级别
if (doc.getStyles().getStyle(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal())
.getCTStyle().getPPr().getOutlineLvl() != null) {
String styleName = doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal();
return String.valueOf(doc.getStyles().getStyle(styleName).getCTStyle().getPPr().getOutlineLvl().getVal());
}
} catch (Exception e) {
}
return titleLvl;
}
截取标题
public static ByteArrayOutputStream interceptByInputStream(InputStream inputStream, String... titleName) throws IOException {
MyXWPDocument xdoc = new MyXWPDocument(inputStream);
List<IBodyElement> bodyElements = xdoc.getBodyElements();
int count = bodyElements.size();
System.out.println(count);
int start = 0;
int end = count;
boolean isExit = false;
for (int i = 0; i < count; i++) {
IBodyElement bodyElement = bodyElements.get(i);
BodyElementType bet = bodyElement.getElementType();
if (bet == BodyElementType.PARAGRAPH) {
// 段落
XWPFParagraph paragraph = ((XWPFParagraph) bodyElement);
// 判断该段落是否设置了大纲级别
String control = getTitleLvl(xdoc, paragraph);
if (control.equals("0") && !isExit) {
for (String s : titleName) {
if (paragraph.getText().contains(s)) {
start = i;
isExit = true;
break;
}
}
if (isExit) {
continue;
}
}
if (isExit && control.equals("0")) {
end = i - 1;
break;
}
}
}
log.info("#################################################################,开始分割开始标标记[{}],结束标记:[{}]", start, end);
System.out.println("start:" + start);
System.out.println("end:" + end);
ByteArrayOutputStream stream = new ByteArrayOutputStream();
if (isExit) {
for (int i = count; i >= end; i--) {
xdoc.removeBodyElementAndSTD(i);
}
log.info("*****************");
// 写入一个新文件
for (int i = start - 1; i >= 0; i--) {
xdoc.removeBodyElementAndSTD(i);
}
xdoc.write(stream);
}
return stream;
}
MyXWPDocument 基于XWPDocument word 实现的(XWDPDocument 对于可编辑的区域没有处理)
/**
* @author hs
* @version 1.0
* @date: 2023/5/30
*/
public class MyXWPDocument extends XWPFDocument {
public MyXWPDocument(InputStream inputStream) throws IOException {
super(inputStream);
}
public boolean removeBodyElementAndSTD(int pos) {
if (pos >= 0 && pos < bodyElements.size()) {
BodyElementType type = bodyElements.get(pos).getElementType();
if (type == BodyElementType.TABLE) {
int tablePos = getTablePos(pos);
tables.remove(tablePos);
getDocument().getBody().removeTbl(tablePos);
}
if (type == BodyElementType.PARAGRAPH) {
int paraPos = getParagraphPos(pos);
paragraphs.remove(paraPos);
getDocument().getBody().removeP(paraPos);
}
if(type == BodyElementType.CONTENTCONTROL){
int i=-1;
int j =0;
if(!contentControls.isEmpty()){
for (XWPFSDT contentControl : contentControls) {
if (contentControl == bodyElements.get(pos)) {
i = j;
break;
}
j++;
}
if(i!=-1){
getDocument().getBody().removeSdt(i);
contentControls.remove(i);
}
}
}
bodyElements.remove(pos);
return true;
}
return false;
}
}
大梦谁先觉?平生我自知,草堂春睡足,窗外日迟迟。