使用Java代码操作ES
创建微服务项目services-search
添加依赖
<!--elasticsearch版本 7.4.2 -->
<properties>
<elasticsearch.version>7.4.2</elasticsearch.version>
</properties>
<!--引入依赖: elasticsearch-rest-high-level-client -->
<dependencies>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.4.2</version>
</dependency>
</dependencies>
修改配置文件
server:
port: 83
spring:
application:
name: service-util
cloud:
nacos:
discovery:
server-addr: 外网IP:8848
username: nacos
password: kgcqh
redis:
host: 外网IP
port: 6379
database: 0
password: kgcqh
management:
endpoints:
web:
exposure:
include: "*"
es:
hostname: 外网IP
port: 9200
protocol: http
创建ES的配置类ElasticSearchConfig
创建RestHighLevelClient对象
@Configuration
@ConfigurationProperties(prefix = "es")
@Setter
public class ElasticSearchConfig {
private String hostname;
private int port;
private String protocol;
//请求的一些选项
public static final RequestOptions COMMON_OPTIONS;
static{
RequestOptions.Builder builder=RequestOptions.DEFAULT.toBuilder();
COMMON_OPTIONS=builder.build();
}
@Bean
public RestHighLevelClient restHighLevelClient(){
RestClientBuilder restClientBuilder=
RestClient.builder(new HttpHost(hostname,port,protocol));
RestHighLevelClient restHighLevelClient=
new RestHighLevelClient(restClientBuilder);
return restHighLevelClient;
}
}
ESUtil工具类(可以直接用)
package cn.kgc.qh.util;
import cn.kgc.qh.config.ElasticSearchConfig;
import cn.kgc.qh.entity.Company;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.annotation.TableId;
import com.fasterxml.jackson.core.JsonParser;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.metrics.Cardinality;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @Description: ElasticSearch的工具类
* @author:
* @date: 2021年12月07日 9:15
*/
@Component
@Slf4j
public class ESUtil {
@Resource
private RestHighLevelClient restHighLevelClient;
/**
* @param index 索引
* @param bachList 保存的数据
* @param <T>
* @return
*/
public <T extends Object> boolean saveBatch(String index,List<T> bachList){
BulkRequest bulkRequest=new BulkRequest();
//封装保存的数据
for(int i=0;i<bachList.size();i++){
T t= bachList.get(i);
String jsonString= JSON.toJSONString(t);
IndexRequest indexRequest = new IndexRequest();
indexRequest.index(index);
indexRequest.source(jsonString,XContentType.JSON);
bulkRequest.add(indexRequest);
}
try {
restHighLevelClient.bulk(bulkRequest,ElasticSearchConfig.COMMON_OPTIONS);
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
/**
* ES保存数据
*
* @param index 索引
* @param id 主键
* @param jsonString 新增的数据
* @return
*/
public boolean saveOne(String index, String id, String jsonString) {
//封装保存的数据
IndexRequest indexRequest = new IndexRequest();
indexRequest.index(index);
indexRequest.id(id);
indexRequest.source(jsonString, XContentType.JSON);
try {
//执行新增操作
IndexResponse indexResponse =
restHighLevelClient.index(indexRequest, ElasticSearchConfig.COMMON_OPTIONS);
log.info("新增的结果:" + indexResponse);
return true;
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
/**
* 根据ID删除单条记录
*
* @param index
* @param id
* @return
*/
public boolean deleteById(String index, String id) {
DeleteRequest deleteRequest = new DeleteRequest();
deleteRequest.index(index);
deleteRequest.id(id);
try {
DeleteResponse deleteResponse =
restHighLevelClient.delete(deleteRequest,
ElasticSearchConfig.COMMON_OPTIONS);
log.info("删除的结果:" + deleteResponse);
return true;
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
/**
* 更新数据
*
* @param index
* @param id
* @param jsonString
* @return
*/
public boolean updateById(String index, String id, String jsonString) {
UpdateRequest updateRequest = new UpdateRequest();
updateRequest.index(index);
updateRequest.id(id);
updateRequest.doc(jsonString, XContentType.JSON);
try {
UpdateResponse response =
restHighLevelClient.update(updateRequest, ElasticSearchConfig.COMMON_OPTIONS);
log.info("更新的结果:" + response);
return true;
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
/**
* 根据ID从ES中查询数据
* @param index
* @param id
* @param targetClass
* @return
*
*/
public <T extends Object> T select(String index,String id, Class<T> targetClass){
SearchRequest searchRequest=new SearchRequest();
searchRequest.indices(index);
//构建查询条件
SearchSourceBuilder searchSourceBuilder=new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.termQuery("_id",id));
searchRequest.source(searchSourceBuilder);
try {
SearchResponse searchResponse= restHighLevelClient.search(searchRequest,ElasticSearchConfig.COMMON_OPTIONS);
SearchHits hits= searchResponse.getHits();
SearchHit [] searchHits= hits.getHits(); //查询结果
if(searchHits==null||searchHits.length==0){ //如果数组为空,表示没有查询结果
return null;
}
String jsonString= searchHits[0].getSourceAsString();
T t= JSON.parseObject(jsonString,targetClass);
return t;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
*
* @param index
* @param targetClass
* @param searchSourceBuilder 查选条件的构建器
* @param <T>
* @return
*/
public <T extends Object> List<T> select(String index,Class<T> targetClass,
SearchSourceBuilder searchSourceBuilder){
SearchRequest searchRequest=new SearchRequest(); //查询请求
searchRequest.indices(index);
searchRequest.source(searchSourceBuilder); //请求中添加中查询条件
List<T> list=new ArrayList<>();
try {
//执行查询,获得查询结果
SearchResponse searchResponse=
restHighLevelClient.search(searchRequest,ElasticSearchConfig.COMMON_OPTIONS);
SearchHits hits= searchResponse.getHits(); //封装结果
SearchHit[] searchHits= hits.getHits();
for(SearchHit searchHit:searchHits){
String jsonString= searchHit.getSourceAsString();
T t=JSON.parseObject(jsonString,targetClass);
list.add(t);
}
} catch (IOException e) {
e.printStackTrace();
}
return list;
}
/**
* 使用from+size的方式实现 ES的分页查询
* @param index 索引
* @param searchSourceBuilder 查询条件
* @param targetClass 目标类
* @param from 从第几条开始
* @param size 显示多少条记录
* @param <T>
* @return Map: 当前页的数据 总页数
*/
public <T extends Object> Map<String,Object> page(String index,
SearchSourceBuilder searchSourceBuilder,
Class<T> targetClass,int from,int size){
SearchRequest searchRequest=new SearchRequest();
searchRequest.indices(index);
searchSourceBuilder.from(from);
searchSourceBuilder.size(size);
searchRequest.source(searchSourceBuilder);
Map<String,Object> result=new HashMap<>();
List<T> resultList=new ArrayList<>();
int page=0;
try {
SearchResponse searchResponse=
restHighLevelClient.search(searchRequest,ElasticSearchConfig.COMMON_OPTIONS);
SearchHits hits= searchResponse.getHits();
long totalValue=hits.getTotalHits().value; //获得总记录数 3.0/2=1.5 Math.ceil(1.5) 2.0;
page = (int) Math.ceil((double)totalValue/size); //总页数
SearchHit [] searchHits= hits.getHits();
for(SearchHit searchHit:searchHits){
String jsonString= searchHit.getSourceAsString();
T t= JSON.parseObject(jsonString,targetClass);
resultList.add(t);
}
} catch (IOException e) {
e.printStackTrace();
}
result.put("page",page);
result.put("list",resultList);
return result;
}
/**
* 使用scroll分页
* @param index
* @param searchSourceBuilder
* @param targetClass
* @param size
* @param scrollId
* @param <T>
* @return Map 当前页的数据 scrollId page
*/
public <T extends Object> Map<String,Object> page(String index,
SearchSourceBuilder searchSourceBuilder,
Class<T> targetClass,int size,String scrollId){
SearchRequest searchRequest=new SearchRequest();
searchRequest.indices(index);
Scroll scroll=new Scroll(TimeValue.timeValueMinutes(1)); //指定scroll镜像的时间为1分钟
searchSourceBuilder.size(size); //每页显示多少条记录
Map<String,Object> map=new HashMap<>();
SearchResponse searchResponse=null;
try {
if(StringUtils.isBlank(scrollId)){ //scroll方式的第一次查询
searchRequest.scroll(scroll); //查询是scroll查询 镜像的时间为1分钟
searchRequest.source(searchSourceBuilder); //查询请求中添加查询条件
searchResponse=restHighLevelClient.search(searchRequest,ElasticSearchConfig.COMMON_OPTIONS);
}else{ //scroll方式的后面查询 请求:GET /_search/scroll
SearchScrollRequest searchScrollRequest=new SearchScrollRequest();
searchScrollRequest.scroll(scroll);
searchScrollRequest.scrollId(scrollId);
searchResponse=restHighLevelClient.scroll(searchScrollRequest,ElasticSearchConfig.COMMON_OPTIONS);
}
//封装查询结果
map= searchResponseToMap(searchResponse,size,targetClass);
} catch (IOException e) {
e.printStackTrace();
}
return map;
}
//当前页的数据 scrollId 总页数
private <T extends Object> Map<String,Object>
searchResponseToMap(SearchResponse searchResponse,int size,Class<T> targetClass){
SearchHits hits= searchResponse.getHits(); //查询的结果
double count=hits.getTotalHits().value; //获得总记录数
int page= (int)Math.ceil(count/size); //算出总页数
Map<String,Object> map=new HashMap<>(); //返回的结果
List<T> list=new ArrayList<>(); //当前页的数据
SearchHit [] searchHits= hits.getHits(); //获得hits中的数据
for(SearchHit temp:searchHits){
String jsonString= temp.getSourceAsString();
T t=JSON.parseObject(jsonString,targetClass);
list.add(t);
}
map.put("page",page);
map.put("scrollId",searchResponse.getScrollId());
map.put("list",list);
return map;
}
}
下面解释一下分页(我这里有两种)
ES中的分页
方式一 from + size 方式分页
from:从第几条开始 size:显示多少条记录
GET /students/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "scott"
}
}
]
}
},
"from": 0,
"size": 10
}
深度查询问题(使用from+size分页查询, 限制下分页的页数)
分页查询数据,每页显示10条记录
我们可以假设在一个有5个主分片的索引中搜索。 当我们请求结果的第一页,每一个分片产生前 10 的结果,并且返回给 协调节点。协调节点对50个结果排序得到全部结果的前10个。现在假设我们请求第1000页,从 10001 到 10010 。所有都以相同的方式工作除了每个分片不得不产生前10010个结果。然后协调节点对全部50050个结果排序最后丢弃掉这些结果中的 50040 个结果。
可以看到,在分布式系统中,对结果排序的成本随分页的深度成指数上升。
缺点:有深度查询问题, 在数据量比较少时,或者限制分页的页数时可以使用
分页方式二 scroll
第一次请求传入size条数, 按照_doc排序。请求地址上scroll=1m 表示此次查询的会话是一分钟,并且在这一分钟内不会查询到向ES中添加的新的数据。
GET /company-index/_search?scroll=1m
{
"query": {
"match_all": {}
},
"sort": [
{
"_doc": {
"order": "desc"
}
}
],
"size": 2
}
后面的查询指定第一次插入获得的scroll_id,后面不需要在传入size参数
GET /_search/scroll
{
"scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAEGAUWMlRKYnQzcW1RMDJBeVhnODF6ekxTQQ==",
"scroll": "1m"
}
scroll每个批次查询的条数是: size * number_of_primary_shards
缺点: 数据并不是实时的。
Nested类型
PUT my-index-000001/_doc/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
实际在ES中存储时会将存储的数据扁平化处理
{
"group" : "fans",
"user.first" : [ "Alice", "John" ],
"user.last" : [ "Smith", "White" ]
}
执行以下查询语句
GET my-index-000001/_search
{
"query": {
"bool": {
"must": [
{ "match": { "user.first": "Alice" }},
{ "match": { "user.last": "Smith" }}
]
}
}
}
应该查不到数据,但实际会查询到数据。
我们可以使用Nested类型 修改映射
DELETE my-index-000001
PUT my-index-000001
{
"mappings": {
"properties": {
"user": {
"type": "nested"
}
}
}
}
在执行保存操作
PUT my-index-000001/_doc/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
使用以下语句在次查询
GET my-index-000001/_search
{
"query": {
"nested": {
"path": "user",
"query": {
"bool": {
"must": [
{ "match": { "user.first": "Alice" }},
{ "match": { "user.last": "Smith" }}
]
}
}
}
}
}
总结:
ES如果有一个属性是一个对象数组,在创建映射的时候,要指定该属性的类型是nested类型。否则ES对该属性进行扁平化处理。
发现符合我们的预期,没有查询到结果