pom依赖
<dependencies>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.pdf</artifactId>
<version>8.7.0</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>com.e-iceblue</id>
<url>https://repo.e-iceblue.cn/repository/maven-public</url>
</repository>
</repositories>
Model代码
import com.spire.pdf.PdfDocument;
import com.spire.pdf.PdfPageBase;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import com.spire.pdf.widget.PdfPageCollection;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.io.InputStream;
/**
* PDF管理
*/
@Slf4j
@RestController
@RequestMapping(value = "/pdf")
@Api(value = "PdfController", tags = "PDF管理")
public class PdfController {
@ApiOperation(value = "读取PDf")
@PostMapping(value = "/read")
String readPdf(@RequestParam("file") MultipartFile file) throws IOException {
InputStream inputStream = file.getInputStream();
PdfDocument pdfDocument = new PdfDocument();
pdfDocument.loadFromStream(inputStream);
log.info("title:======={}", pdfDocument.getDocumentInformation().getTitle());
PdfPageCollection pages = pdfDocument.getPages();
StringBuilder textBuilder = new StringBuilder();
PdfTableExtractor pdfTableExtractor = new PdfTableExtractor(pdfDocument);
for (int i = 0; i < pages.getCount(); i++) {
log.info("i:======={}", i);
PdfPageBase pdfPage = pages.get(i);
// 去除文字前后空白格
textBuilder.append(pdfPage.extractText(false));
for (PdfTable pdfTable : pdfTableExtractor.extractTable(i)) {
int columnCount = pdfTable.getColumnCount();
int rowCount = pdfTable.getRowCount();
log.info("columnCount:======={}, rowCount======={}", columnCount, rowCount);
String table = pdfTable.getText(1, 0);
log.info("table:======={}", table);
}
}
// 去除水印(未获取商业版权限会有警告字符串)
String ignoreStr = "\\r\\n Evaluation Warning : The document was created with Spire.PDF for Java.\\r\\n";
String text = textBuilder.toString().replaceAll(ignoreStr, "");
log.info("text:======={}", text);
pdfDocument.close();
return text;
}
}