项目ES搭建及实战-v1.4

一、概述

  • 此文章是对java如何引入es项目进行实战总结,为之后搭建es项目进行参考

二、配置

pom配置

解释:依赖存在排包重新引入elasticsearch,是因为client包版本存在不对的情况

<es.version>7.5.2</es.version>

<dependency>
    <groupId>org.elasticsearch</groupId>
    <artifactId>elasticsearch</artifactId>
    <version>${es.version}</version>
</dependency>
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-high-level-client</artifactId>
    <version>${es.version}</version>
    <exclusions>
        <exclusion>
            <artifactId>elasticsearch</artifactId>
            <groupId>org.elasticsearch</groupId>
        </exclusion>
    </exclusions>
</dependency>

<dependency>
    <groupId>org.springframework.data</groupId>
    <artifactId>spring-data-elasticsearch</artifactId>
    <version>4.0.9.RELEASE</version>
    <exclusions>
        <exclusion>
            <artifactId>elasticsearch</artifactId>
            <groupId>org.elasticsearch</groupId>
        </exclusion>
        <exclusion>
            <artifactId>elasticsearch-core</artifactId>
            <groupId>org.elasticsearch</groupId>
        </exclusion>
        <exclusion>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <groupId>org.elasticsearch.client</groupId>
        </exclusion>
        <exclusion>
            <artifactId>lang-mustache-client</artifactId>
            <groupId>org.elasticsearch.plugin</groupId>
        </exclusion>
        <exclusion>
            <artifactId>parent-join-client</artifactId>
            <groupId>org.elasticsearch.plugin</groupId>
        </exclusion>
        <exclusion>
            <artifactId>rank-eval-client</artifactId>
            <groupId>org.elasticsearch.plugin</groupId>
        </exclusion>
    </exclusions>
</dependenc>

application.properties配置(测试)

es.securityUser=xxxx
es.securityPassword=xxxx
#机器集群
es.host[0].hostname=xxx.xxx.xxx.xxx
es.host[0].port=xxxx
es.host[0].scheme=http
es.host[1].hostname=xxx.xxx.xxx.xxx
es.host[1].port=xxxx
es.host[1].scheme=http
es.host[2].hostname=xxx.xxx.xxx.xxx
es.host[2].port=xxxx
es.host[2].scheme=http

java配置类

Property:

import lombok.Data;
import lombok.ToString;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

import java.util.List;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/13
 * @Description
 */
@Data
@Component
@ToString
@ConfigurationProperties("es")
public class EsProperty {
    private String securityUser;
   
    private String securityPassword;

    private List<Host> host;

    @Data
    @ToString
    public static class Host {
        private String hostname;
        private Integer port;
        private String scheme;
    }
}


Config:

import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;

import java.util.ArrayList;
import java.util.List;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/13
 * @Description es配置类
 */
@Slf4j
@Configuration
public class EsConfig extends AbstractElasticsearchConfiguration {

    @Autowired
    private EsProperty esProperty;

    @Bean
    @Override
    public RestHighLevelClient elasticsearchClient() {
        log.info("es配置加载:{}", esProperty);
        List<EsProperty.Host> host = esProperty.getHost();

        List<HttpHost> httpHosts = new ArrayList<>();
        for (EsProperty.Host h : host) {
            httpHosts.add(new HttpHost(h.getHostname(), h.getPort(), h.getScheme()));
        }

        return new RestHighLevelClient(
                RestClient.builder(
                        httpHosts.toArray(new HttpHost[0])
                ).setHttpClientConfigCallback(httpClientBuilder ->
                        httpClientBuilder.setDefaultCredentialsProvider(getCredentialsProvider()))
        );
    }

    public CredentialsProvider getCredentialsProvider() {
        CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY,
                new UsernamePasswordCredentials(esProperty.getSecurityUser(), esProperty.getSecurityPassword()));
        return credentialsProvider;
    }


}

三、实战

(一)、简单操作

上述配置我们引入的是spring-data-elasticsearch

所以简单的查询可以通过继承ElasticsearchRepository实现简单查询

复杂操作本次使用为ElasticsearchOperations,当然也可以使用RestHighLevelClient

实体类:

import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.Data;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.DateFormat;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;

import java.sql.Timestamp;
import java.util.Date;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/13
 * @Description es测试
*/
@Data
@Document(indexName = "blog", shards = 2, replicas = 3)//shards:分片数,replicas:副本数
public class Blog {
    /**
     * 自增id
     * 起名叫id,哪怕没有配置@Id注解依然会被视为文档id!!!!
     * SimpleElasticsearchPersistentProperty#SUPPORTED_ID_PROPERTY_NAMES
     */
    @Id
    private Long blogId;

    @Field(type = FieldType.Text)
    private String title;

    @Field(type = FieldType.Text)
    private String content;

    @Field(type = FieldType.Text)
    private String author;

    @Field(type = FieldType.Keyword)
    private String category;

    //0: 未发布(草稿) 1:已发布 2:已删除
    @Field(type = FieldType.Integer)
    private int status;

    @Field(type = FieldType.Keyword)
    private String serialNum;

    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss.SSS")
    @Field(type= FieldType.Date, format= DateFormat.custom, pattern="yyyy-MM-dd HH:mm:ss.SSS")
    private Date createTime;

    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss.SSS")
    @Field(type=FieldType.Date, format=DateFormat.custom, pattern="yyyy-MM-dd HH:mm:ss.SSS")
    private Date updateTime;

    /**
     * 收货时间
     */
    @Field(type= FieldType.Date, format= DateFormat.custom, pattern="yyyy-MM-dd HH:mm:ss")
    private Timestamp compromiseTime;

}

Repository:

import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/13
 * @Description es测试
*/
public interface EsBlogRepository extends ElasticsearchRepository<Blog, Long> {

}

manager:

import org.elasticsearch.index.query.QueryBuilders;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchOperations;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchHits;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.stereotype.Service;

import java.util.List;
import java.util.stream.Collectors;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/13
 * @Description
*/
@Service
public class EsBlogManagerImpl implements EsBlogManager {
    @Autowired
    private ElasticsearchOperations elasticsearchOperations;
    @Autowired
    private EsBlogRepository esBlogRepository;

    @Override
    public List<Blog> termQueryByField(String fieldName, String value) {
        NativeSearchQueryBuilder nativeSearchQueryBuilder = new NativeSearchQueryBuilder();
        nativeSearchQueryBuilder
                .withQuery(QueryBuilders.termQuery(fieldName, value));
//                .withPageable(PageRequest.of(0, 1));

        SearchHits<Blog> search = elasticsearchOperations.search(nativeSearchQueryBuilder.build(), Blog.class);
        return search.get().map(SearchHit::getContent).collect(Collectors.toList());
    }

    @Override
    public Blog findById(Long id) {
        return esBlogRepository.findById(id).orElse(null);
    }

    @Override
    public Blog save(Blog blog) {
        return esBlogRepository.save(blog);
    }
}

(二)、操作记录功能

项目需要对某个操作动作进行行为记录。

考量点及解决方案:

1.数据量

  • 问题:数据量太大会导致查询效率
  • 方案:按日期进行划分存储介质(如数据库分表、es分片)

2.存储介质的动态创建

  • 问题:按日期划分,需动态对存储介质进行创建
  • 方案:定时任务,创建存储介质。此方案用的es,所以是定时任务每天根据es模板配置,创建第二天的索引

3.记录操作影响范围

  • 问题:操作记录的动作不能影响主流程执行
  • 方案:使用mq或异步线程操作(这里用的异步)

创建ES模板

es模板虽有自动创建索引的功能,但并不支持按照日期自动创建索引。因此使用模板+java代码进行索引维护注意下es自身配置的坑。

注意:别名及生命阶段问题(详见:索引模板问题)

1.1 模板生命周期
PUT /_ilm/policy/operation_records_policy
{
    "policy":{
        "phases":{
            "delete":{
                "min_age":"25d",
                "actions":{
                    "delete":{
                        "delete_searchable_snapshot":true
                    }
                }
            }
        }
    }
}

1.2 模板配置
PUT /_template/operation_record_template
{
 "index_patterns": [
  "operation_records_*"
 ],
 "settings": {
  "number_of_shards": 2,
  "number_of_replicas": 2,
  "index": {
   "lifecycle": {
    "name": "operation_records_policy"
   }
  }
 },
 "mappings": {
  "properties": {
   "id": {
    "type": "keyword"
   },
   "type": {
    "type": "integer"
   },
   "queryId": {
    "type": "long"
   },
   "keyword1": {
    "type": "keyword"
   },
   "context": {
    "type": "text"
   },
   "operationTime": {
    "type": "date",
    "format": "yyyy-MM-dd HH:mm:ss"
   }
  }
 }
}

1.3 其它命令

辅助操作在此简单记录

删除模板REST命令
DELETE /_template/operation_record_template
查询索引周期
GET operation_records_20240123/_ilm/explain
查询别名
GET /_alias/operation_records
GET operation_records_20240123/_alias/*
生命周期手动滚动重试
POST /operation_records_20240123/_ilm/retry
POST /operation_records/_rollover
   {
     "conditions": {
       "max_age": "5d",
       "max_primary_shard_size": "50gb"
     }
   }
设置别名
POST /_aliases
{
  "actions": [
    { "add": { "index": "operation_records_20240123", "alias": "operation_records", "is_write_index": true } }
  ]
}

java-es基础类创建

import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.Data;
import lombok.ToString;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.DateFormat;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;

import java.util.Date;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/20
 * @Description 操作记录
*/
@Data
@ToString
//save时可根据今天自动创建索引,主流程没有使用这个只作为参考,此处用这个是兜底的想法
@Document(indexName = "operation_records_#{T(java.time.format.DateTimeFormatter).ofPattern('yyyyMMdd').format(T(java.time.LocalDate).now())}", shards = 3, replicas = 2)
public class OperationRecord {

    /**
     * 文档id
     */
    @Id
    private String id;

    /**
     * 场景
     */
    @Field(type = FieldType.Integer)
    private Integer type;

    /**
     * 查询标识
     */
    @Field(type = FieldType.Long)
    private Long queryId;

    /**
     * 关键词1
     */
    @Field(type = FieldType.Keyword)
    private String keyword1;

    /**
     * 操作内容
     */
    @Field(type = FieldType.Text)
    private String context;
    
    /**
     * 操作时间
     */
    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
    @Field(type= FieldType.Date, format= DateFormat.custom, pattern="yyyy-MM-dd HH:mm:ss")
    private Date operationTime;


}

import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/20
 * @Description 操作记录
*/
public interface OperationRecordRepository extends ElasticsearchRepository<OperationRecord, String> {

}

manager逻辑类

方法:

saveOperationRecordAsync:异步保存操作记录

getIndexCoordinates:根据日期获取索引是否存在,不存在则创建索引,用于定时任务动态创建索引

findRecordsByDate:简单的查询测试

import cn.hutool.core.date.*;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.elasticsearch.core.ElasticsearchOperations;
import org.springframework.data.elasticsearch.core.IndexOperations;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.core.SearchHits;
import org.springframework.data.elasticsearch.core.document.Document;
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
import org.springframework.stereotype.Service;

import java.text.SimpleDateFormat;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;

/**
 * @author ext.xuzhengyang5
 * @date 2024/1/20
 * @Description 操作记录
*/
@Slf4j
@Service
public class OperationRecordManagerImpl implements OperationRecordManager {
    private static final String OPERATION_RECORD_PREFIX = "operation_records_";
    private static final String INDEX_DATE_PATTERN = "yyyyMMdd";
    private static final SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");;

    @Autowired
    private OperationRecordRepository operationRecordRepository;

    @Autowired
    private ElasticsearchOperations elasticsearchOperations;

    @Override
    public void saveOperationRecordAsync(OperationRecord operationRecord) {
        CompletableFuture.runAsync(() -> {
            try {
                saveOperationRecord(operationRecord);
            } catch (Exception e) {
                log.error("异步操作记录入库失败 operationRecord:{} e:{}", operationRecord, e);
                throw e;
            }
        });
    }

    @Override
    public OperationRecord saveOperationRecord(OperationRecord operationRecord) {
        if (Objects.isNull(operationRecord.getOperationTime())) {
            log.info("操作记录数据为空 使用当前时间");
            operationRecord.setOperationTime(new Date());
        }
        Date operationTime = operationRecord.getOperationTime();
        // 获取今天的日期
        Date today = DateUtil.date();
        // 获取10天前的日期
        Date tenDaysAgo = DateUtil.offsetDay(today, -10);

        // 检查传入的日期是否不早于10天前且不晚于今天
        boolean bool = DateUtil.isIn(operationTime, tenDaysAgo, today);
        if (Boolean.FALSE.equals(bool)) {
            log.warn("操作日志时间超出范围:{}", operationTime);
            return null;
        }
        Date date = operationRecord.getOperationTime();
        DateTimeFormatter formatter = DateTimeFormatter.ofPattern(INDEX_DATE_PATTERN);
        String indexName = OPERATION_RECORD_PREFIX + formatter.format(date.toInstant().atZone(ZoneId.systemDefault()).toLocalDate());
//        IndexCoordinates indexCoordinates = getIndexCoordinates(operationRecord.getOperationTime());

        // 索引文档
        return elasticsearchOperations.save(operationRecord, IndexCoordinates.of(indexName));
    }

    /**
     * 根据入参时间获取索引
     * 根据入参时间判断索引是否存在,不存在则创建索引
     *
     * @param date
     * @return
     */
    @Override
    public IndexCoordinates getIndexCoordinates(Date date) {
        DateTimeFormatter formatter = DateTimeFormatter.ofPattern(INDEX_DATE_PATTERN);
        String indexName = OPERATION_RECORD_PREFIX + formatter.format(date.toInstant().atZone(ZoneId.systemDefault()).toLocalDate());

        IndexCoordinates indexCoordinates = IndexCoordinates.of(indexName);
        IndexOperations indexOperations = elasticsearchOperations.indexOps(indexCoordinates);

        // 检查索引是否存在,如果不存在则创建索引
        if (!indexOperations.exists()) {
            indexOperations.create();
            // 创建映射
            Map<String, Object> mappingMap = indexOperations.createMapping(OperationRecord.class);
            Document mapping = Document.from(mappingMap);
            indexOperations.putMapping(mapping);
        }
        return indexCoordinates;
    }

    /**
     * 根据时间范围查询数据
     *
     * @param startDate
     * @param endDate
     * @return
     */
    @Override
    public List<OperationRecord> findRecordsByDate(Date startDate, Date endDate, int page, int pageSize) {
        // 生成查询时间范围
        String start = simpleDateFormat.format(startDate);
        String end = simpleDateFormat.format(endDate);
        // 构建范围查询
        RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("operationTime")
                .gte(start)
                .lte(end);

        // 创建term查询
//        TermQueryBuilder termQuery = QueryBuilders.termQuery(termField, termValue);

        // 创建bool查询
        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery()
                .must(rangeQuery);
//                .filter(termQuery)

        // 创建分页对象
        PageRequest pageRequest = PageRequest.of(page, pageSize);

        // 获取时间范围涉及到的索引集合
        List<DateTime> dateTimes = DateUtil.rangeToList(startDate, endDate, DateField.DAY_OF_YEAR);
        List<String> indexNames = dateTimes.stream().map(dateTime -> {
            String dateStr = dateTime.toString(INDEX_DATE_PATTERN);
            return OPERATION_RECORD_PREFIX + dateStr;
        }).collect(Collectors.toList());

        SearchHits<OperationRecord> searchHits = elasticsearchOperations.search(
                new NativeSearchQueryBuilder()
                        .withQuery(boolQuery)
                        .withPageable(pageRequest)
                        .build(),
                OperationRecord.class,
                IndexCoordinates.of(indexNames.toArray(new String[0]))
        );

        // 返回结果
        return searchHits.stream().map(SearchHit::getContent).collect(Collectors.toList());
    }

}

定时任务逻辑:定时任务每天执行创建第二天的索引

备注:就算创建失败也问题不大,在插入数据时无索引会自动创建索引

public TaskResult doTask() throws Exception {
    log.info("创建索引任务 start");
    DateTime today = DateUtil.date();
    DateTime tomorrow = DateUtil.offsetDay(today, 1);
    //创建操作表索引索引
    IndexCoordinates indexCoordinates = operationRecordManager.getIndexCoordinates(tomorrow);
    log.info("创建索引任务 操作表索引创建完成 indexName:{}", indexCoordinates.getIndexName());
    log.info("创建索引任务 end");
    return TaskResult.success();
}

四、问题记录

  1. spring-data-elasticsearch,实体类(entity)如果有字段名起名id,则就算没有使用@Id注解此字段也会默认注册为文档Id,且会与使用@Id字段的注册冲突。
    • 此问题翻看源码,找出SimpleElasticsearchPersistentProperty构造方法,其中this.isId判断有个 或 SUPPORTED_ID_PROPERTY_NAMES.contains(getFieldName());。也就是说如果字段名为id、document则默认就会争抢Id的注册
private static final List<String> SUPPORTED_ID_PROPERTY_NAMES = Arrays.asList("id", "document");

public SimpleElasticsearchPersistentProperty(Property property,
			PersistentEntity<?, ElasticsearchPersistentProperty> owner, SimpleTypeHolder simpleTypeHolder) {

		super(property, owner, simpleTypeHolder);

		this.annotatedFieldName = getAnnotatedFieldName();
		this.isId = super.isIdProperty() || SUPPORTED_ID_PROPERTY_NAMES.contains(getFieldName());
		this.isScore = isAnnotationPresent(Score.class);
		this.isParent = isAnnotationPresent(Parent.class);
		this.isSeqNoPrimaryTerm = SeqNoPrimaryTerm.class.isAssignableFrom(getRawType());

		if (isVersionProperty() && !getType().equals(Long.class)) {
			throw new MappingException(String.format("Version property %s must be of type Long!", property.getName()));
		}

		if (isScore && !getType().equals(Float.TYPE) && !getType().equals(Float.class)) {
			throw new MappingException(
					String.format("Score property %s must be either of type float or Float!", property.getName()));
		}

		if (isParent && !getType().equals(String.class)) {
			throw new MappingException(String.format("Parent property %s must be of type String!", property.getName()));
		}

		if (isAnnotationPresent(Field.class) && isAnnotationPresent(MultiField.class)) {
			throw new MappingException("@Field annotation must not be used on a @MultiField property.");
		}

		initDateConverter();
	}

  1. es时间同步问题
    • es使用的是UTC(世界标准时间)
    • 中国一般使用的是CST(中国标准时间)也就是UTC+8
  2. 索引模板问题

背景:模板配置完毕后,索引没有在规定的“死亡时间”被清理

原因:

  • 索引生命周期如果进行rollover切换的时候,如果对模板设置过别名。则创建的索引必须要指定别名
    • index.lifecycle.rollover_alias [operation_records] does not point to index [operation_records_20240123]
  • 指定别名的索引在rollover切换时命名不规范则会报错
    • index name [operation_records_20240123] does not match pattern '^.*-\\d+$'
    • 索引模板之所以需要规范命名,是因为他提供了动态创建索引的功能。但此功能本次开发不需要
  • 去掉花里胡哨的阶段转换及别名配置,ILM策略专注于索引到期清理,解决掉此问题

备注:使用es别名功能,查询时增加别名过滤。在此做记录,之后使用的话做参考

POST /_aliases
{
  "actions": [
    {
      "add": {
        "index": "xxxx",
        "alias": "xxxx",
        "filter": {
          "range": {
            "operationTime": {
              "gte": "2024-02-22T00:00:00",
              "lte": "2024-02-22T23:59:59",
              "format": "yyyy-MM-dd'T'HH:mm:ss"
            }
          }
        }
      }
    }
  ]
}

  • 11
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值