一、这里我的思路是,在上传文件时候把数据库对应id存到Elasticsearch对应索引类型的id,然后利用工具类把pdf、word、excel文件内容存到Elasticsearch的id对应json里,返回所有文件id的list
二、这里我封装了一个读取pdf、word、excel文件工具类
package springboot.elasticsearch;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.PdfPageBase;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
public class ReadUtil {
/**
* JSON字符串特殊字符处理
* @param s
* @return String
*/
public static String stringJson(String s) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
switch (c) {
case '\"':
sb.append("\\\"");
break;
case '\\':
sb.append("\\\\");
break;
case '/':
sb.append("\\/");
break;
case '\b':
sb.append("\\b");
break;
case '\f':
sb.append("\\f");
break;
case '\n':
sb.append("\\n");
break;
case '\r':
sb.append("\\r");