SpringBoot集成ElasticSearch(ES)

A尘埃

已于 2024-03-12 11:51:23 修改

阅读量934

点赞数 13

文章标签： spring boot elasticsearch

于 2024-03-06 16:27:55 首次发布

本文链接：https://blog.csdn.net/usa_washington/article/details/136496966

版权

ElasticSearch环境搭建

采用docker-compose搭建，具体配置如下：


version: '3'

# 网桥es -> 方便相互通讯
networks:
  es:

services:
  elasticsearch:
    image: registry.cn-hangzhou.aliyuncs.com/zhengqing/elasticsearch:7.14.1      # 原镜像`elasticsearch:7.14.1`
    container_name: elasticsearch             # 容器名为'elasticsearch'
    restart: unless-stopped                           # 指定容器退出后的重启策略为始终重启，但是不考虑在Docker守护进程启动时就已经停止了的容器
    volumes:                                  # 数据卷挂载路径设置,将本机目录映射到容器目录
      - "./elasticsearch/data:/usr/share/elasticsearch/data"
      - "./elasticsearch/logs:/usr/share/elasticsearch/logs"
      - "./elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml"
#      - "./elasticsearch/config/jvm.options:/usr/share/elasticsearch/config/jvm.options"
      - "./elasticsearch/plugins/ik:/usr/share/elasticsearch/plugins/ik" # IK中文分词插件
    environment:                              # 设置环境变量,相当于docker run命令中的-e
      TZ: Asia/Shanghai
      LANG: en_US.UTF-8
      TAKE_FILE_OWNERSHIP: "true"  # 权限
      discovery.type: single-node
      ES_JAVA_OPTS: "-Xmx512m -Xms512m"
      #ELASTIC_PASSWORD: "123456" # elastic账号密码
    ports:
      - "9200:9200"
      - "9300:9300"
    networks:
      - es

  kibana:
    image: registry.cn-hangzhou.aliyuncs.com/zhengqing/kibana:7.14.1       # 原镜像`kibana:7.14.1`
    container_name: kibana
    restart: unless-stopped
    volumes:
      - ./elasticsearch/kibana/config/kibana.yml:/usr/share/kibana/config/kibana.yml
    ports:
      - "5601:5601"
    depends_on:
      - elasticsearch
    links:
      - elasticsearch
    networks:
      - es

部署

# 运行
docker-compose -f docker-compose-elasticsearch.yml -p elasticsearch up -d
# 运行后，给当前目录下所有文件赋予权限(读、写、执行)
#chmod -R 777 ./elasticsearch

ES访问地址：ip地址:9200 默认账号密码：elastic/123456

kibana访问地址：ip地址:5601/app/dev_tools#/console 默认账号密码：elastic/123456

设置ES密码

# 进入容器
docker exec -it elasticsearch /bin/bash
# 设置密码-随机生成密码
# elasticsearch-setup-passwords auto
# 设置密码-手动设置密码
elasticsearch-setup-passwords interactive
# 访问
curl 127.0.0.1:9200 -u elastic:123456

一、依赖


<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>springboot-demo</artifactId>
        <groupId>com.et</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>elasticsearch</artifactId>



    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-webflux</artifactId>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>${fastjson.version}</version>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>io.projectreactor</groupId>
            <artifactId>reactor-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>

</project>

二、配置文件和启动类

# elasticsearch.yml 文件中的 cluster.name
spring.data.elasticsearch.cluster-name=docker-cluster
# elasticsearch 调用地址，多个使用“,”隔开
spring.data.elasticsearch.cluster-nodes=127.0.0.1:9300
#spring.data.elasticsearch.repositories.enabled=true
#spring.data.elasticsearch.username=elastic
#spring.data.elasticsearch.password=123456
#spring.data.elasticsearch.network.host=0.0.0.0

@SpringBootApplication
public class EssearchApplication {

    public static void main(String[] args) {
        SpringApplication.run(EssearchApplication.class, args);
    }

}

三、document实体类

@Ducument(indexName = "orders",type = "product")
@Mapping(mappingPath = "productIndex.json")//解决ik分词不能使用的问题
public class ProductDocument implements Serializable{
	
	@Id
    private String id;
    //@Field(analyzer = "ik_max_word",searchAnalyzer = "ik_max_word")
    private String productName;
    //@Field(analyzer = "ik_max_word",searchAnalyzer = "ik_max_word")
    private String productDesc;

    private Date createTime;

    private Date updateTime;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getProductName() {
        return productName;
    }

    public void setProductName(String productName) {
        this.productName = productName;
    }

    public String getProductDesc() {
        return productDesc;
    }

    public void setProductDesc(String productDesc) {
        this.productDesc = productDesc;
    }

    public Date getCreateTime() {
        return createTime;
    }

    public void setCreateTime(Date createTime) {
        this.createTime = createTime;
    }

    public Date getUpdateTime() {
        return updateTime;
    }

    public void setUpdateTime(Date updateTime) {
        this.updateTime = updateTime;
    }
}

四、Service接口以及实现类

public interface EsSearchService extends BaseSearchService<ProductDocument>{
	
	//保存
	void save(ProductDocument... productDocuments);

	//删除
	void delete(String id);
	
	//清空索引
	void deleteAll();

	//根据id查询
	ProductDocument getById(String id);

	//查询全部
	List<ProductDocument> getAll();
}

@Service
public class EsSearchServiceImpl extends BaseSearchServiceImpl<ProductDocument> implements EsSearchService {

	private Logger log = LoggerFactory.getLogger(getClass());

	@Resource
	private ElasticsearchTemplate elasticsearchTemplate;

	@Resource
	private ProductDocumentRepository productDocumentRepository;
	
	@Override
    public void save(ProductDocument ... productDocuments) {
        elasticsearchTemplate.putMapping(ProductDocument.class);
        if(productDocuments.length > 0){
            /*Arrays.asList(productDocuments).parallelStream()
                    .map(productDocumentRepository::save)
                    .forEach(productDocument -> log.info("【保存数据】：{}", JSON.toJSONString(productDocument)));*/
            log.info("【保存索引】：{}",JSON.toJSONString(productDocumentRepository.saveAll(Arrays.asList(productDocuments))));
        }
    }

    @Override
    public void delete(String id) {
        productDocumentRepository.deleteById(id);
    }

    @Override
    public void deleteAll() {
        productDocumentRepository.deleteAll();
    }

    @Override
    public ProductDocument getById(String id) {
        return productDocumentRepository.findById(id).get();
    }

    @Override
    public List<ProductDocument> getAll() {
        List<ProductDocument> list = new ArrayList<>();
        productDocumentRepository.findAll().forEach(list::add);
        return list;
    }
}

五、repository

@Component
public interface ProductDocumentRepository extends ElasticsearchRepository<ProductDocument,String> {
}

productIndex.json

{
  "properties": {
    "createTime": {
      "type": "long"
    },
    "productDesc": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "productName": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "updateTime": {
      "type": "long"
    }
  }
}

六、测试

@RunWith(SpringRunner.class)
@SpringBootTest
public class EssearchApplicationTests {
    private Logger log = LoggerFactory.getLogger(getClass());

    @Autowired
    private EsSearchService esSearchService;

    @Test
    public void save() {
        log.info("【创建索引前的数据条数】：{}",esSearchService.getAll().size());

        ProductDocument productDocument = ProductDocumentBuilder.create()
                .addId(System.currentTimeMillis() + "01")
                .addProductName("无印良品 MUJI 基础润肤化妆水")
                .addProductDesc("无印良品 MUJI 基础润肤化妆水 高保湿型 200ml")
                .addCreateTime(new Date()).addUpdateTime(new Date())
                .builder();

        ProductDocument productDocument1 = ProductDocumentBuilder.create()
                .addId(System.currentTimeMillis() + "02")
                .addProductName("荣耀 V10 尊享版")
                .addProductDesc("荣耀 V10 尊享版 6GB+128GB 幻夜黑 移动联通电信4G全面屏游戏手机 双卡双待")
                .addCreateTime(new Date()).addUpdateTime(new Date())
                .builder();

        ProductDocument productDocument2 = ProductDocumentBuilder.create()
                .addId(System.currentTimeMillis() + "03")
                .addProductName("资生堂(SHISEIDO) 尿素红罐护手霜")
                .addProductDesc("日本进口 资生堂(SHISEIDO) 尿素红罐护手霜 100g/罐 男女通用 深层滋养 改善粗糙")
                .addCreateTime(new Date()).addUpdateTime(new Date())
                .builder();

        esSearchService.save(productDocument,productDocument1,productDocument2);

        log.info("【创建索引ID】:{},{},{}",productDocument.getId(),productDocument1.getId(),productDocument2.getId());
        log.info("【创建索引后的数据条数】：{}",esSearchService.getAll().size());
    }

    @Test
    public void getAll(){
        esSearchService.getAll().parallelStream()
                .map(JSON::toJSONString)
                .forEach(System.out::println);
    }

    @Test
    public void deleteAll() {
        esSearchService.deleteAll();
    }

    @Test
    public void getById() {
        log.info("【根据ID查询内容】：{}", JSON.toJSONString(esSearchService.getById("154470178213401")));
    }

    @Test
    public void query() {
        log.info("【根据关键字搜索内容】：{}", JSON.toJSONString(esSearchService.query("无印良品荣耀",ProductDocument.class)));
    }

    @Test
    public void queryHit() {

        String keyword = "联通尿素";
        String indexName = "orders";

        List<Map<String,Object>> searchHits = esSearchService.queryHit(keyword,indexName,"productName","productDesc");
        log.info("【根据关键字搜索内容，命中部分高亮，返回内容】：{}", JSON.toJSONString(searchHits));
        //[{"highlight":{"productDesc":"<span style='color:red'>无印良品</span> MUJI 基础润肤化妆水 高保湿型 200ml","productName":"<span style='color:red'>无印良品</span> MUJI 基础润肤化妆水"},"source":{"productDesc":"无印良品 MUJI 基础润肤化妆水 高保湿型 200ml","createTime":1544755966204,"updateTime":1544755966204,"id":"154475596620401","productName":"无印良品 MUJI 基础润肤化妆水"}},{"highlight":{"productDesc":"<span style='color:red'>荣耀</span> V10 尊享版 6GB+128GB 幻夜黑 移动联通电信4G全面屏游戏手机 双卡双待","productName":"<span style='color:red'>荣耀</span> V10 尊享版"},"source":{"productDesc":"荣耀 V10 尊享版 6GB+128GB 幻夜黑 移动联通电信4G全面屏游戏手机 双卡双待","createTime":1544755966204,"updateTime":1544755966204,"id":"154475596620402","productName":"荣耀 V10 尊享版"}}]
    }

    @Test
    public void queryHitByPage() {

        String keyword = "联通尿素";
        String indexName = "orders";

        Page<Map<String,Object>> searchHits = esSearchService.queryHitByPage(1,1,keyword,indexName,"productName","productDesc");
        log.info("【分页查询，根据关键字搜索内容，命中部分高亮，返回内容】：{}", JSON.toJSONString(searchHits));
        //[{"highlight":{"productDesc":"<span style='color:red'>无印良品</span> MUJI 基础润肤化妆水 高保湿型 200ml","productName":"<span style='color:red'>无印良品</span> MUJI 基础润肤化妆水"},"source":{"productDesc":"无印良品 MUJI 基础润肤化妆水 高保湿型 200ml","createTime":1544755966204,"updateTime":1544755966204,"id":"154475596620401","productName":"无印良品 MUJI 基础润肤化妆水"}},{"highlight":{"productDesc":"<span style='color:red'>荣耀</span> V10 尊享版 6GB+128GB 幻夜黑 移动联通电信4G全面屏游戏手机 双卡双待","productName":"<span style='color:red'>荣耀</span> V10 尊享版"},"source":{"productDesc":"荣耀 V10 尊享版 6GB+128GB 幻夜黑 移动联通电信4G全面屏游戏手机 双卡双待","createTime":1544755966204,"updateTime":1544755966204,"id":"154475596620402","productName":"荣耀 V10 尊享版"}}]
    }

    @Test
    public void deleteIndex() {
        log.info("【删除索引库】");
        esSearchService.deleteIndex("orders");
    }

}

在这里插入图片描述

SpringBoot+ElasticSearch实现文档内容抽取、高亮分词、全文检索

文件内容识别

第一步：要用es实现文本附件内容的识别，需要先给es安装一个插件：Ingest Attachment Processor Plugin

这是一个文本抽取内容识别的插件，利用了Elasticsearch的ingest node功能，提供了关键的预处理器attachment。还有其他的：OCR之类的其它插件。

①、到es的安装文件bin目录下执行(es是使用docker安装的，所以需要进入到es的docker镜像里面的bin目录下安装插件)
在这里插入图片描述

②、创建一个文本抽取的管道

主要是用于将上传的附件转换成文本内容，支持（word，PDF，txt，excel没试，应该也支持）
在这里插入图片描述

{
    "description": "Extract attachment information",
    "processors": [
        {
            "attachment": {
                "field": "content",
                "ignore_missing": true
            }
        },
        {
            "remove": {
                "field": "content"
            }
        }
    ]
}

③、定义内容存储的索引
在这里插入图片描述

mapping：定义的是存储的字段格式
setting:索引的配置信息，这边定义了一个分词（使用的是jieba的分词）
【注意：内容检索的是attachment.content字段，一定要使用分词，不使用分词的话，检索会检索不出来内容】

{
  "mappings": {
    "properties": {
      "id":{
        "type": "keyword"
      },
      "fileName":{
        "type": "text",
        "analyzer": "my_ana"
      },
      "contentType":{
        "type": "text",
         "analyzer": "my_ana"
      },
       "fileUrl":{
        "type": "text"
      },
      "attachment": {
        "properties": {
          "content":{
            "type": "text",
            "analyzer": "my_ana"
          }
        }
      }
    }
  },
  "settings": {
    "analysis": {
      "filter": {
        "jieba_stop": {
          "type":        "stop",
          "stopwords_path": "stopword/stopwords.txt"
        },
        "jieba_synonym": {
          "type":        "synonym",
          "synonyms_path": "synonym/synonyms.txt"
        }
      },
      "analyzer": {
        "my_ana": {
          "tokenizer": "jieba_index",
          "filter": [
            "lowercase",
            "jieba_stop",
            "jieba_synonym"
          ]
        }
      }
    }
  }
}

④、测试
在这里插入图片描述
测试内容需要将附件转换成base64格式
在线转换文件的地址：https://www.zhangxinxu.com/sp/base64.html

{
    "id":"1",
 "name":"进口红酒",
 "filetype":"pdf",
    "contenttype":"文章",
 "content":"文章内容"
}

查询刚刚上传的文件：
在这里插入图片描述

{
    "took": 861,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": 1.0,
        "hits": [
            {
                "_index": "fileinfo",
                "_type": "_doc",
                "_id": "lkPEgYIBz3NlBKQzXYX9",
                "_score": 1.0,
                "_source": {
                    "fileName": "测试_20220809164145A002.docx",
                    "updateTime": 1660034506000,
                    "attachment": {
                        "date": "2022-08-09T01:38:00Z",
                        "content_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                        "author": "DELL",
                        "language": "lt",
                        "content": "内容",
                        "content_length": 2572
                    },
                    "createTime": 1660034506000,
                    "fileUrl": "http://localhost:8092/fileInfo/profile/upload/fileInfo/2022/08/09/测试_20220809164145A002.docx",
                    "id": 1306333192,
                    "contentType": "文章",
                    "fileType": "docx"
                }
            },
            {
                "_index": "fileinfo",
                "_type": "_doc",
                "_id": "mUPHgYIBz3NlBKQzwIVW",
                "_score": 1.0,
                "_source": {
                    "fileName": "测试_20220809164527A001.docx",
                    "updateTime": 1660034728000,
                    "attachment": {
                        "date": "2022-08-09T01:38:00Z",
                        "content_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                        "author": "DELL",
                        "language": "lt",
                        "content": "内容",
                        "content_length": 2572
                    },
                    "createTime": 1660034728000,
                    "fileUrl": "http://localhost:8092/fileInfo/profile/upload/fileInfo/2022/08/09/测试_20220809164527A001.docx",
                    "id": 1306333193,
                    "contentType": "文章",
                    "fileType": "docx"
                }
            },
            {
                "_index": "fileinfo",
                "_type": "_doc",
                "_id": "JDqshoIBbkTNu1UgkzFK",
                "_score": 1.0,
                "_source": {
                    "fileName": "txt测试_20220810153351A001.txt",
                    "updateTime": 1660116831000,
                    "attachment": {
                        "content_type": "text/plain; charset=UTF-8",
                        "language": "lt",
                        "content": "内容",
                        "content_length": 804
                    },
                    "createTime": 1660116831000,
                    "fileUrl": "http://localhost:8092/fileInfo/profile/upload/fileInfo/2022/08/10/txt测试_20220810153351A001.txt",
                    "id": 1306333194,
                    "contentType": "告示",
                    "fileType": "txt"
                }
            }
        ]
    }
}

调用上传的接口，可以看到文本内容已经抽取到es里面了，后面就可以直接分词检索内容，高亮显示了

文件上传，数据库存储附件信息和附件上传地址；
调用es实现文本内容抽取，将抽取的内容放到对应索引下；
提供小程序全文检索的api实现根据文件名称关键词联想，文件名称内容全文检索模糊匹配，并高亮显示分词匹配字段；

# 数据源配置
spring:
    # 服务模块
    devtools:
        restart:
            # 热部署开关
            enabled: true
    # 搜索引擎
    elasticsearch:
        rest:
            url: 127.0.0.1
            uris: 127.0.0.1:9200
            connection-timeout: 1000
            read-timeout: 3000
            username: elastic
            password: 123456

elsticsearchConfig（连接配置）

@Configuration
public class ElasticsearchConfig {
    @Value("${spring.elasticsearch.rest.url}")
    private String edUrl;
    @Value("${spring.elasticsearch.rest.username}")
    private String userName;
    @Value("${spring.elasticsearch.rest.password}")
    private String password;
 
    @Bean
    public RestHighLevelClient restHighLevelClient() {
        //设置连接的用户名密码
        final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(userName, password));
        RestHighLevelClient client =  new RestHighLevelClient(RestClient.builder(
                        new HttpHost(edUrl, 9200,"http"))
                .setHttpClientConfigCallback(httpClientBuilder -> {
                    httpClientBuilder.disableAuthCaching();
                    //保持连接池处于链接状态，该bug曾导致es一段时间没使用，第一次连接访问超时
                    httpClientBuilder.setKeepAliveStrategy(((response, context) -> Duration.ofMinutes(5).toMillis()));
                    return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                })
        );
        return client;
    }
}

文件上传保存文件信息并抽取内容到es
实体对象FileInfo

@Setter
@Getter
@Document(indexName = "fileinfo",createIndex = false)
public class FileInfo {
    /**
    * 主键
    */
    @Field(name = "id", type = FieldType.Integer)
    private Integer id;
 
    /**
    * 文件名称
    */
    @Field(name = "fileName", type = FieldType.Text,analyzer = "jieba_index",searchAnalyzer = "jieba_index")
    private String fileName;
 
    /**
    * 文件类型
    */
    @Field(name = "fileType",  type = FieldType.Keyword)
    private String fileType;
 
    /**
    * 内容类型
    */
    @Field(name = "contentType", type = FieldType.Text)
    private String contentType;
 
    /**
     * 附件内容
     */
    @Field(name = "attachment.content", type = FieldType.Text,analyzer = "jieba_index",searchAnalyzer = "jieba_index")
    @TableField(exist = false)
    private String content;
 
    /**
    * 文件地址
    */
    @Field(name = "fileUrl", type = FieldType.Text)
    private String fileUrl;
 
    /**
     * 创建时间
     */
    private Date createTime;
 
    /**
     * 更新时间
     */
    private Date updateTime;
}

Controller

@RestController
@RequestMapping("/fileInfo")
public class FileInfoController extends BaseController {
    /**
     * 服务对象
     */
    @Resource
    private FileInfoService fileInfoService;
 
 
    @PutMapping("uploadFile")
    public AjaxResult uploadFile(String contentType, MultipartFile file) {
        return fileInfoService.uploadFileInfo(contentType,file);
    }
}

serviceImpl实现类

@Service
@Slf4j
public class FileInfoServiceImpl implements FileInfoService{
    @Resource
    private ServerConfig serverConfig;
 
    @Autowired
    @Qualifier("restHighLevelClient")
    private RestHighLevelClient client;
 
    @Resource
    private FileInfoMapper fileInfoMapper;
 
    /**
     * 上传文件并进行文件内容识别上传到es
     * @param contentType
     * @param file
     * @return
     */
    @Override
    public AjaxResult uploadFileInfo(String contentType, MultipartFile file) {
        if (FastUtils.checkNullOrEmpty(contentType,file)){
            return AjaxResult.error("请求参数不能为空");
        }
        try {
            // 上传文件路径
            String filePath = RuoYiConfig.getUploadPath() + "/fileInfo";
            FileInfo fileInfo = new FileInfo();
            // 上传并返回新文件名称
            String fileName = FileUploadUtils.upload(filePath, file);
            String prefix = fileName.substring(fileName.lastIndexOf(".")+1);
            File files = File.createTempFile(fileName, prefix);
            file.transferTo(files);
            String url = serverConfig.getUrl() + "/fileInfo" + fileName;
            fileInfo.setFileName(FileUtils.getName(fileName));
            fileInfo.setFileType(prefix);
            fileInfo.setFileUrl(url);
            fileInfo.setContentType(contentType);
            int result = fileInfoMapper.insertSelective(fileInfo);
            if (result > 0) {
                fileInfo = fileInfoMapper.selectOne(new LambdaQueryWrapper<FileInfo>().eq(FileInfo::getFileUrl,fileInfo.getFileUrl()));
                byte[] bytes = getContent(files);
                String base64 = Base64.getEncoder().encodeToString(bytes);
                fileInfo.setContent(base64);
                IndexRequest indexRequest = new IndexRequest("fileinfo");
                //上传同时，使用attachment pipline进行提取文件
                indexRequest.source(JSON.toJSONString(fileInfo), XContentType.JSON);
                indexRequest.setPipeline("attachment");
                IndexResponse indexResponse = client.index(indexRequest, RequestOptions.DEFAULT);
                log.info("indexResponse:" + indexResponse);
            }
            AjaxResult ajax = AjaxResult.success(fileInfo);
            return ajax;
        } catch (Exception e) {
            return AjaxResult.error(e.getMessage());
        }
    }
 
 
     /**
     * 文件转base64
     *
     * @param file
     * @return
     * @throws IOException
     */
    private byte[] getContent(File file) throws IOException {
 
        long fileSize = file.length();
        if (fileSize > Integer.MAX_VALUE) {
            log.info("file too big...");
            return null;
        }
        FileInputStream fi = new FileInputStream(file);
        byte[] buffer = new byte[(int) fileSize];
        int offset = 0;
        int numRead = 0;
        while (offset < buffer.length
                && (numRead = fi.read(buffer, offset, buffer.length - offset)) >= 0) {
            offset += numRead;
        }
        // 确保所有数据均被读取
        if (offset != buffer.length) {
            throw new ServiceException("Could not completely read file "
                    + file.getName());
        }
        fi.close();
        return buffer;
    }
}

高亮分词检索

@Data
@ApiModel(value ="WarningInfoDto",description = "告警信息")
public class WarningInfoDto{
    /**
     * 页数
     */
    @ApiModelProperty("页数")
    private Integer pageIndex;
 
    /**
     * 每页数量
     */
    @ApiModelProperty("每页数量")
    private Integer pageSize;
 
    /**
     * 查询关键词
     */
    @ApiModelProperty("查询关键词")
    private String keyword;
 
    /**
     * 内容类型
     */
    private List<String> contentType;
 
    /**
     * 用户手机号
     */
    private String phone;
}

Controller

@Api("搜索引擎")
@RestController
@RequestMapping("es")
public class ElasticsearchController extends BaseController {
    @Resource
    private ElasticsearchService elasticsearchService;
 
    /**
     * 告警信息关键词联想
     *
     * @param warningInfoDto
     * @return
     */
    @ApiOperation("关键词联想")
    @ApiImplicitParams({
            @ApiImplicitParam(name = "contenttype", value = "文档类型", required = true, dataType = "String", dataTypeClass = String.class),
            @ApiImplicitParam(name = "keyword", value = "关键词", required = true, dataType = "String", dataTypeClass = String.class)
    })
    @PostMapping("getAssociationalWordDoc")
    public AjaxResult getAssociationalWordDoc(@RequestBody WarningInfoDto warningInfoDto, HttpServletRequest request) {
        List<String> words = elasticsearchService.getAssociationalWordOther(warningInfoDto,request);
        return AjaxResult.success(words);
    }
 
 
    /**
     * 告警信息高亮分词分页查询
     *
     * @param warningInfoDto
     * @return
     */
    @ApiOperation("高亮分词分页查询")
    @ApiImplicitParams({
            @ApiImplicitParam(name = "keyword", value = "关键词", required = true, dataType = "String", dataTypeClass = String.class),
            @ApiImplicitParam(name = "pageIndex", value = "页码", required = true, dataType = "Integer", dataTypeClass = Integer.class),
            @ApiImplicitParam(name = "pageSize", value = "页数", required = true, dataType = "Integer", dataTypeClass = Integer.class),
            @ApiImplicitParam(name = "contenttype", value = "文档类型", required = true, dataType = "String", dataTypeClass = String.class)
    })
    @PostMapping("queryHighLightWordDoc")
    public AjaxResult queryHighLightWordDoc(@RequestBody WarningInfoDto warningInfoDto,HttpServletRequest request) {
        IPage<FileInfo> warningInfoListPage = elasticsearchService.queryHighLightWordOther(warningInfoDto,request);
        return AjaxResult.success(warningInfoListPage);
    }
}

serviceImpl实现类

@Service
@Slf4j
public class ElasticsearchServiceImpl implements ElasticsearchService {
 
    @Resource
    private WhiteListService whiteListService;
 
    @Autowired
    @Qualifier("restHighLevelClient")
    private RestHighLevelClient client;
 
    @Autowired
    private RedisCache redisCache;
 
    @Resource
    private TokenService tokenService;
 
 
    /**
     * 文档信息关键词联想(根据输入框的词语联想文件名称)
     *
     * @param warningInfoDto
     * @return
     */
    @Override
    public List<String> getAssociationalWordOther(WarningInfoDto warningInfoDto, HttpServletRequest request) {
        //需要查询的字段
        BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
                .should(QueryBuilders.matchBoolPrefixQuery("fileName", warningInfoDto.getKeyword()));
        //contentType标签内容过滤
        boolQueryBuilder.must(QueryBuilders.termsQuery("contentType", warningInfoDto.getContentType()));
        //构建高亮查询
        NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
                .withQuery(boolQueryBuilder)
                .withHighlightFields(
                        new HighlightBuilder.Field("fileName")
                )
                .withHighlightBuilder(new HighlightBuilder().preTags("<span style='color:red'>").postTags("</span>"))
                .build();
        //查询
        SearchHits<FileInfo> search = null;
        try {
            search = elasticsearchRestTemplate.search(searchQuery, FileInfo.class);
        } catch (Exception ex) {
            ex.printStackTrace();
            throw new ServiceException(String.format("操作错误，请联系管理员！%s", ex.getMessage()));
        }
        //设置一个最后需要返回的实体类集合
        List<String> resultList = new LinkedList<>();
        //遍历返回的内容进行处理
        for (org.springframework.data.elasticsearch.core.SearchHit<FileInfo> searchHit : search.getSearchHits()) {
            //高亮的内容
            Map<String, List<String>> highlightFields = searchHit.getHighlightFields();
            //将高亮的内容填充到content中
            searchHit.getContent().setFileName(highlightFields.get("fileName") == null ? searchHit.getContent().getFileName() : highlightFields.get("fileName").get(0));
            if (highlightFields.get("fileName") != null) {
                resultList.add(searchHit.getContent().getFileName());
            }
        }
        //list去重
        List<String> newResult = null;
        if (!FastUtils.checkNullOrEmpty(resultList)) {
            if (resultList.size() > 9) {
                newResult = resultList.stream().distinct().collect(Collectors.toList()).subList(0, 9);
            } else {
                newResult = resultList.stream().distinct().collect(Collectors.toList());
            }
        }
        return newResult;
    }
 
    /**
     * 高亮分词搜索其它类型文档
     *
     * @param warningInfoDto
     * @param request
     * @return
     */
    @Override
    public IPage<FileInfo> queryHighLightWordOther(WarningInfoDto warningInfoDto, HttpServletRequest request) {
        //分页
        Pageable pageable = PageRequest.of(warningInfoDto.getPageIndex() - 1, warningInfoDto.getPageSize());
         //需要查询的字段，根据输入的内容分词全文检索fileName和content字段
        BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
                .should(QueryBuilders.matchBoolPrefixQuery("fileName", warningInfoDto.getKeyword()))
                .should(QueryBuilders.matchBoolPrefixQuery("attachment.content", warningInfoDto.getKeyword()));
        //contentType标签内容过滤
        boolQueryBuilder.must(QueryBuilders.termsQuery("contentType", warningInfoDto.getContentType()));
        //构建高亮查询
        NativeSearchQuery searchQuery = new NativeSearchQueryBuilder()
                .withQuery(boolQueryBuilder)
                .withHighlightFields(
                        new HighlightBuilder.Field("fileName"), new HighlightBuilder.Field("attachment.content")
                )
                .withHighlightBuilder(new HighlightBuilder().preTags("<span style='color:red'>").postTags("</span>"))
                .build();
        //查询
        SearchHits<FileInfo> search = null;
        try {
            search = elasticsearchRestTemplate.search(searchQuery, FileInfo.class);
        } catch (Exception ex) {
            ex.printStackTrace();
            throw new ServiceException(String.format("操作错误，请联系管理员！%s", ex.getMessage()));
        }
        //设置一个最后需要返回的实体类集合
        List<FileInfo> resultList = new LinkedList<>();
        //遍历返回的内容进行处理
        for (org.springframework.data.elasticsearch.core.SearchHit<FileInfo> searchHit : search.getSearchHits()) {
            //高亮的内容
            Map<String, List<String>> highlightFields = searchHit.getHighlightFields();
            //将高亮的内容填充到content中
            searchHit.getContent().setFileName(highlightFields.get("fileName") == null ? searchHit.getContent().getFileName() : highlightFields.get("fileName").get(0));
            searchHit.getContent().setContent(highlightFields.get("content") == null ? searchHit.getContent().getContent() : highlightFields.get("content").get(0));
            resultList.add(searchHit.getContent());
        }
        //手动分页返回信息
        IPage<FileInfo> warningInfoIPage = new Page<>();
        warningInfoIPage.setTotal(search.getTotalHits());
        warningInfoIPage.setRecords(resultList);
        warningInfoIPage.setCurrent(warningInfoDto.getPageIndex());
        warningInfoIPage.setSize(warningInfoDto.getPageSize());
        warningInfoIPage.setPages(warningInfoIPage.getTotal() % warningInfoDto.getPageSize());
        return warningInfoIPage;
    }
}

测试
在这里插入图片描述

--请求jason
{
    "keyword":"全库备份",
    "contentType":["告示"],
    "pageIndex":1,
    "pageSize":10
}
 
 
--响应
{
    "msg": "操作成功",
    "code": 200,
    "data": {
        "records": [
            {
                "id": 1306333194,
                "fileName": "txt测试_20220810153351A001.txt",
                "fileType": "txt",
                "contentType": "告示",
                "content": "•\t秒级快速<span style='color:red'>备份</span>\r\n不论多大的数据量，<span style='color:red'>全库</span><span style='color:red'>备份</span>只需30秒，而且<span style='color:red'>备份过程</span>不会对数据库加锁，对应用程序几乎无影响，全天24小时均可进行<span style='color:red'>备份</span>。",
                "fileUrl": "http://localhost:8092/fileInfo/profile/upload/fileInfo/2022/08/10/txt测试_20220810153351A001.txt",
                "createTime": "2022-08-10T15:33:51.000+08:00",
                "updateTime": "2022-08-10T15:33:51.000+08:00"
            }
        ],
        "total": 1,
        "size": 10,
        "current": 1,
        "orders": [],
        "optimizeCountSql": true,
        "searchCount": true,
        "countId": null,
        "maxLimit": null,
        "pages": 1
    }
}