1.pom.xml 中新增
org.apache.poi
poi
4.0.0
org.apache.poi
poi-ooxml
4.0.0
org.apache.poi
poi-scratchpad
4.0.0
org.apache.poi
poi-scratchpad
3.17
2.service
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import org.springframework.stereotype.Service;
import java.io.InputStream;
@Slf4j
@Service
public class PdfService {
public void sendPdf(WorkFlowRequestDTO requestDTO) throws Exception {
testReadByDoc("pdf/aa.docx");
}
public void testReadByDoc(String filePath) throws Exception {
String text = "";
InputStream is = new ClassPathResource((filePath)).getInputStream();
if (filePath.endsWith("docx")) {
HWPFDocument doc = new HWPFDocument(is);
//输出书签信息
//输出文本
System.out.println(doc.getDocumentText());
XWPFDocument doc1= new XWPFDocument(is);
XWPFWordExtractor extractor = new XWPFWordExtractor(doc1);
text = extractor.getText();
extractor.close();
is.close();
}
}
}
3.注意
docx 和doc不能互转 会抛异常(内部的结构不一样)