关于elasticsearch中更新大量数据的测试,分别使用根据id更新与使用updateById方法查询

测试环境

  1. elasticsearch版本:7.17.1
{
  "name" : "e1d11424cc66",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "KxmWJ_YORs2lkZ_yuV0lXg",
  "version" : {
    "number" : "7.17.1",
    "build_flavor" : "default",
    "build_type" : "docker",
    "build_hash" : "e5acb99f822233d62d6444ce45a4543dc1c8059a",
    "build_date" : "2022-02-23T22:20:54.153567231Z",
    "build_snapshot" : false,
    "lucene_version" : "8.11.1",
    "minimum_wire_compatibility_version" : "6.8.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}
  1. java版本:jdk1.8
C:\Users\yxc>java --version
Unrecognized option: --version
Error: Could not create the Java Virtual Machine.
Error: A fatal exception has occurred. Program will exit.
  1. springboot版本:2.5.4
<parent>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-parent</artifactId>
    <version>2.5.4</version>
    <relativePath/>
</parent>

测试方法

创建springboot项目,连接本地服务器当中的单点es,在sprinboot中的CommandLineRunner编写测试方法,打包在本地服务器(和es同一个服务器)运行,用System.currentTimeMillis()方法获取时间戳,记录运行所需时间

测试过程

  1. 实体类准备
@Data
@NoArgsConstructor
@AllArgsConstructor
@Document(indexName = "demo")
public class EsDemoEntity {

    @Field(type = FieldType.Text, store = true)
    private String cId;

    @Field(type = FieldType.Text, store = true)
    private String cName;

    @Field(type = FieldType.Long, store = true)
    private Long cTime;

    @Field(type = FieldType.Text, store = true)
    private String cValue;

}
  1. 测试代码
    常量类
// 使用的常量
public class Constant {
	
	/**
	* index
	*/
    public static final String esIndex = "ceshi";
	/**
	* 根据id更新中,一次处理的块大小
	*/
    public static final List<Integer> sizeList = Arrays.asList(100, 500, 1000, 5000, 10000);
	/**
	* 测试的数据量大小
	*/
    public static final List<Integer> dataSizeList = Arrays.asList(1000, 10000, 100000, 1000000);

}

测试类

package com.yxc.elasticsearchdemo.task;

import com.alibaba.fastjson.JSON;
import com.yxc.elasticsearchdemo.entiey.Constant;
import com.yxc.elasticsearchdemo.entiey.EsDemoEntity;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.reindex.UpdateByQueryRequest;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.HashMap;
import java.util.Map;

/**
 * @author: yxc
 * @time: 2022/6/23 17:59
 **/
@Slf4j
@Component
@Configuration
@Order(value=1)
public class TestTask implements CommandLineRunner {
    
    @Autowired
    private RestHighLevelClient client;
    
    @Override
    public void run(String... args) throws Exception {
        insert();
        updateById();
        updateByQuery();
    }

    /**
     * 插入数据
     *
     * @author yxc
     * @date 2022/6/23 17:55
     * @return void
     **/
    void insert() throws IOException {
        for (Integer dataSize : Constant.dataSizeList) {
            BulkRequest bulkRequest = new BulkRequest();
            for (int i = 0; i < dataSize; i++) {
                EsDemoEntity esDemoEntity = new EsDemoEntity();
                esDemoEntity.setCName("更新测试" + dataSize);
                esDemoEntity.setCTime(LocalDateTime.now().toInstant(ZoneOffset.ofHours(8)).toEpochMilli());
                esDemoEntity.setCValue("value");
                Map<String, Object> map = JSON.parseObject(JSON.toJSONString(esDemoEntity));
                bulkRequest.add(new IndexRequest(Constant.esIndex).source(map));
                if (bulkRequest.numberOfActions() >= 10000) {
                    // 10000条数据插入一次
                    client.bulk(bulkRequest, RequestOptions.DEFAULT);
                    bulkRequest = new BulkRequest();
                }
            }
            if (bulkRequest.numberOfActions() > 0) {
                // 插入剩下数据
                client.bulk(bulkRequest, RequestOptions.DEFAULT);
            }
            log.info("插入了{}条数据", dataSize);
        }
    }

    /**
     * 根据id更新
     *
     * @author yxc
     * @date 2022/6/23 17:55
     * @return void
     **/
    void updateById() throws Exception {
        long startTime = 0L;
        long endTime = 0L;
        // 更新不同数据量的数据
        for (Integer dataSize : Constant.dataSizeList) {
            // 使用不用大小的快来更新
            for (Integer size : Constant.sizeList) {
                startTime = System.currentTimeMillis();
                // 构造查询条件
                SearchRequest searchRequest = new SearchRequest(Constant.esIndex)
                        .scroll(new Scroll(TimeValue.timeValueMinutes(1)))
                        .searchType(SearchType.DEFAULT)
                        .source(
                                new SearchSourceBuilder()
                                        .query(new MatchQueryBuilder("cName.keyword", "更新测试" + dataSize))
                                        .size(size)
                                        .fetchSource("", "")
                        );
                // 游标
                String scrollId = null;
                SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
                if (searchResponse.getHits().getHits().length > 0) {
                    // 命中,进行更新
                    SearchHit[] hits = searchResponse.getHits().getHits();
                    upateByIdBath(hits, "scroll更新" + dataSize);
                    scrollId = searchResponse.getScrollId();
                }
                // 继续循环
                while (StringUtils.isNotEmpty(scrollId)) {
                    SearchScrollRequest searchScrollRequest =
                            new SearchScrollRequest(scrollId).scroll(TimeValue.timeValueMinutes(1));
                    SearchResponse response = client.scroll(searchScrollRequest, RequestOptions.DEFAULT);
                    scrollId = response.getScrollId();
                    if (response.getHits().getHits().length > 0) {
                        // 命中,更新
                        SearchHit[] hits = response.getHits().getHits();
                        upateByIdBath(hits, "scroll更新" + dataSize);
                    } else {
                        scrollId = null;
                    }
                }
                endTime = System.currentTimeMillis();
                log.info("根据id更新,数据量{},更新块大小{},使用时间:", dataSize, size, endTime - startTime);
            }
        }
    }

    /**
     * 根据id更新当中使用的批量更新方法
     *
     * @author yxc
     * @date 2022/6/23 17:55
     * @param hits: 一次查询命中的数据数组
     * @param updateData: 将cValue字段更新成的数据
     * @return boolean
     **/
    boolean upateByIdBath(SearchHit[] hits, String updateData) throws IOException {
        BulkRequest bulkRequest = new BulkRequest();
        for (SearchHit hit : hits) {
            Map<String, Object> doc = new HashMap<>();
            doc.put("cValue", updateData);
            UpdateRequest updateRequest = new UpdateRequest(Constant.esIndex, hit.getId()).doc(doc);
            bulkRequest.add(updateRequest);
        }
        BulkResponse bulk = client.bulk(bulkRequest, RequestOptions.DEFAULT);
        return true;
    }

    /**
     * 使用updateByQuery方法更新
     *
     * @author yxc
     * @date 2022/6/23 17:56
     * @return void
     **/
    void updateByQuery() throws IOException {
        long startTime = 0L;
        long endTime = 0L;
        // 更新不同数据量的数据
        for (Integer dataSize : Constant.dataSizeList) {
            startTime = System.currentTimeMillis();
            UpdateByQueryRequest updateByQueryRequest = new UpdateByQueryRequest(Constant.esIndex);
            updateByQueryRequest.setQuery(QueryBuilders.matchQuery("cName.keyword", "更新测试" + dataSize));
            updateByQueryRequest.setScript(new Script("ctx._source['cValue']='使用updateByQuery更新" + dataSize + "'"));
            endTime = System.currentTimeMillis();
            log.info("使用updateById更新,数据量{},使用时间:{}", dataSize, endTime - startTime);
        }
    }
}


测试结果

在这里插入最后测试结果

结果最后四行的日志内容打错了,应该是使用updateByQuery更新

结论

在较小数据量时,直接使用updateByQuery方法进行更新,逻辑比较简单,代码简介。
如果数据量较大,又要求效率的话,推荐使用先查后改的方式,根据实际环境的内存大小与网络带宽,选择1000以上的分块。

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值