<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
import com.aspose.words.*;
import com.scorpio.sdk.constant.AliYunConstant;
import com.scorpio.sdk.service.OssService;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.*;
import java.util.List;
/**
* @Author xq
* @Package: com.scorpio.sdk.utils
* @Project: scorpio-project
* @Date 2023/4/27 9:53
* 文档解析
*/
@Component
public class DocAnalysisUtil {
private static OssService ossService;
@Autowired
public void setOssService(OssService ossService) {
DocAnalysisUtil.ossService = ossService;
}
/**
* 生成文档首页图片
*
* @param filePath
* @return
*/
public static String generateImg(String filePath) {
// 根据文件地址,获取文件流
InputStream fileInputStream = ossService.getFileInputStream(filePath);
InputStream resInputStream = null;
try {
Document doc = new Document(fileInputStream);
ImageSaveOptions iso = new ImageSaveOptions(SaveFormat.JPEG);
iso.setResolution(128);
iso.setPrettyFormat(true);
iso.setUseAntiAliasing(true);
// 文件页数
iso.setPageIndex(0);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
doc.save(baos, iso);
byte[] buffer = baos.toByteArray();
resInputStream = new ByteArrayInputStream(buffer);
// 文件路径
String path = ossService.putObject(AliYunConstant.getOperationPath(SaveFormat.getName(SaveFormat.JPEG).toLowerCase()), resInputStream);
return ossService.getLogoWatermarkImageUrl(path);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (resInputStream != null) {
resInputStream.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 读取文本
* @param filePath 文件路径
* @param num 读取到多少行
* @return
*/
public static String readText(String filePath, int num) {
// 根据文件地址,获取文件流
InputStream fileInputStream = ossService.getFileInputStream(filePath);
InputStream inputStream = FileMagic.prepareToCheckMagic(fileInputStream);
StringBuilder sb = new StringBuilder();
try {
if (FileMagic.valueOf(inputStream) == FileMagic.OOXML) {
XWPFDocument docx = new XWPFDocument(inputStream);
List<XWPFParagraph> paras = docx.getParagraphs(); //将得到包含段落列表
for (int i = 0; i < (paras.size() > num ? num : paras.size()); i++) {
XWPFParagraph para = paras.get(i);
sb.append("<p>").append(para.getText().replace("\r", "").replace(" ", " ")).append("</p>");
}
} else {
HWPFDocument doc = new HWPFDocument(inputStream);
org.apache.poi.hwpf.usermodel.Range r = doc.getRange();
for (int i = 0; i < (r.numParagraphs() > num ? num : r.numParagraphs()); i++) {
Paragraph paragraph = r.getParagraph(i);
sb.append("<p>").append(paragraph.text().replace("\r", "").replace(" ", " ")).append("</p>");
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
fileInputStream.close();
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
}
读取指定页面文本(后面在文档上找到的方法)
/**
* 读取文本
* @param filePath 文件路径
* @return
*/
public static String readText(String filePath) {
// 根据文件地址,获取文件流
InputStream fileInputStream = ossService.getFileInputStream(filePath);
StringBuilder sb = new StringBuilder();
try {
Document doc = new Document(fileInputStream);
LayoutCollector layoutCollector = new LayoutCollector(doc);
NodeCollection childNodes = doc.getFirstSection().getBody().getChildNodes(NodeType.PARAGRAPH, true);
int pageIndex = 1; // 第一页
for (Object childNode : childNodes) {
Paragraph paragraph = (com.aspose.words.Paragraph) childNode;
// 当前页面
int paraPage = layoutCollector.getEndPageIndex(paragraph);
if (paraPage == pageIndex) {
sb.append("<p>").append(paragraph.getText().replace("\r", "").replace(" ", " ")).append("</p>");
} else if (paraPage > pageIndex) {
break;
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (fileInputStream != null) {
fileInputStream.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}