测试环境
- elasticsearch版本:7.17.1
{
"name" : "e1d11424cc66",
"cluster_name" : "elasticsearch",
"cluster_uuid" : "KxmWJ_YORs2lkZ_yuV0lXg",
"version" : {
"number" : "7.17.1",
"build_flavor" : "default",
"build_type" : "docker",
"build_hash" : "e5acb99f822233d62d6444ce45a4543dc1c8059a",
"build_date" : "2022-02-23T22:20:54.153567231Z",
"build_snapshot" : false,
"lucene_version" : "8.11.1",
"minimum_wire_compatibility_version" : "6.8.0",
"minimum_index_compatibility_version" : "6.0.0-beta1"
},
"tagline" : "You Know, for Search"
}
- java版本:jdk1.8
C:\Users\yxc>java --version
Unrecognized option: --version
Error: Could not create the Java Virtual Machine.
Error: A fatal exception has occurred. Program will exit.
- springboot版本:2.5.4
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.5.4</version>
<relativePath/>
</parent>
测试方法
创建springboot项目,连接本地服务器当中的单点es,在sprinboot中的CommandLineRunner编写测试方法,打包在本地服务器(和es同一个服务器)运行,用System.currentTimeMillis()方法获取时间戳,记录运行所需时间
测试过程
- 实体类准备
@Data
@NoArgsConstructor
@AllArgsConstructor
@Document(indexName = "demo")
public class EsDemoEntity {
@Field(type = FieldType.Text, store = true)
private String cId;
@Field(type = FieldType.Text, store = true)
private String cName;
@Field(type = FieldType.Long, store = true)
private Long cTime;
@Field(type = FieldType.Text, store = true)
private String cValue;
}
- 测试代码
常量类
// 使用的常量
public class Constant {
/**
* index
*/
public static final String esIndex = "ceshi";
/**
* 根据id更新中,一次处理的块大小
*/
public static final List<Integer> sizeList = Arrays.asList(100, 500, 1000, 5000, 10000);
/**
* 测试的数据量大小
*/
public static final List<Integer> dataSizeList = Arrays.asList(1000, 10000, 100000, 1000000);
}
测试类
package com.yxc.elasticsearchdemo.task;
import com.alibaba.fastjson.JSON;
import com.yxc.elasticsearchdemo.entiey.Constant;
import com.yxc.elasticsearchdemo.entiey.EsDemoEntity;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.reindex.UpdateByQueryRequest;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.HashMap;
import java.util.Map;
/**
* @author: yxc
* @time: 2022/6/23 17:59
**/
@Slf4j
@Component
@Configuration
@Order(value=1)
public class TestTask implements CommandLineRunner {
@Autowired
private RestHighLevelClient client;
@Override
public void run(String... args) throws Exception {
insert();
updateById();
updateByQuery();
}
/**
* 插入数据
*
* @author yxc
* @date 2022/6/23 17:55
* @return void
**/
void insert() throws IOException {
for (Integer dataSize : Constant.dataSizeList) {
BulkRequest bulkRequest = new BulkRequest();
for (int i = 0; i < dataSize; i++) {
EsDemoEntity esDemoEntity = new EsDemoEntity();
esDemoEntity.setCName("更新测试" + dataSize);
esDemoEntity.setCTime(LocalDateTime.now().toInstant(ZoneOffset.ofHours(8)).toEpochMilli());
esDemoEntity.setCValue("value");
Map<String, Object> map = JSON.parseObject(JSON.toJSONString(esDemoEntity));
bulkRequest.add(new IndexRequest(Constant.esIndex).source(map));
if (bulkRequest.numberOfActions() >= 10000) {
// 10000条数据插入一次
client.bulk(bulkRequest, RequestOptions.DEFAULT);
bulkRequest = new BulkRequest();
}
}
if (bulkRequest.numberOfActions() > 0) {
// 插入剩下数据
client.bulk(bulkRequest, RequestOptions.DEFAULT);
}
log.info("插入了{}条数据", dataSize);
}
}
/**
* 根据id更新
*
* @author yxc
* @date 2022/6/23 17:55
* @return void
**/
void updateById() throws Exception {
long startTime = 0L;
long endTime = 0L;
// 更新不同数据量的数据
for (Integer dataSize : Constant.dataSizeList) {
// 使用不用大小的快来更新
for (Integer size : Constant.sizeList) {
startTime = System.currentTimeMillis();
// 构造查询条件
SearchRequest searchRequest = new SearchRequest(Constant.esIndex)
.scroll(new Scroll(TimeValue.timeValueMinutes(1)))
.searchType(SearchType.DEFAULT)
.source(
new SearchSourceBuilder()
.query(new MatchQueryBuilder("cName.keyword", "更新测试" + dataSize))
.size(size)
.fetchSource("", "")
);
// 游标
String scrollId = null;
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
if (searchResponse.getHits().getHits().length > 0) {
// 命中,进行更新
SearchHit[] hits = searchResponse.getHits().getHits();
upateByIdBath(hits, "scroll更新" + dataSize);
scrollId = searchResponse.getScrollId();
}
// 继续循环
while (StringUtils.isNotEmpty(scrollId)) {
SearchScrollRequest searchScrollRequest =
new SearchScrollRequest(scrollId).scroll(TimeValue.timeValueMinutes(1));
SearchResponse response = client.scroll(searchScrollRequest, RequestOptions.DEFAULT);
scrollId = response.getScrollId();
if (response.getHits().getHits().length > 0) {
// 命中,更新
SearchHit[] hits = response.getHits().getHits();
upateByIdBath(hits, "scroll更新" + dataSize);
} else {
scrollId = null;
}
}
endTime = System.currentTimeMillis();
log.info("根据id更新,数据量{},更新块大小{},使用时间:", dataSize, size, endTime - startTime);
}
}
}
/**
* 根据id更新当中使用的批量更新方法
*
* @author yxc
* @date 2022/6/23 17:55
* @param hits: 一次查询命中的数据数组
* @param updateData: 将cValue字段更新成的数据
* @return boolean
**/
boolean upateByIdBath(SearchHit[] hits, String updateData) throws IOException {
BulkRequest bulkRequest = new BulkRequest();
for (SearchHit hit : hits) {
Map<String, Object> doc = new HashMap<>();
doc.put("cValue", updateData);
UpdateRequest updateRequest = new UpdateRequest(Constant.esIndex, hit.getId()).doc(doc);
bulkRequest.add(updateRequest);
}
BulkResponse bulk = client.bulk(bulkRequest, RequestOptions.DEFAULT);
return true;
}
/**
* 使用updateByQuery方法更新
*
* @author yxc
* @date 2022/6/23 17:56
* @return void
**/
void updateByQuery() throws IOException {
long startTime = 0L;
long endTime = 0L;
// 更新不同数据量的数据
for (Integer dataSize : Constant.dataSizeList) {
startTime = System.currentTimeMillis();
UpdateByQueryRequest updateByQueryRequest = new UpdateByQueryRequest(Constant.esIndex);
updateByQueryRequest.setQuery(QueryBuilders.matchQuery("cName.keyword", "更新测试" + dataSize));
updateByQueryRequest.setScript(new Script("ctx._source['cValue']='使用updateByQuery更新" + dataSize + "'"));
endTime = System.currentTimeMillis();
log.info("使用updateById更新,数据量{},使用时间:{}", dataSize, endTime - startTime);
}
}
}
测试结果
结果最后四行的日志内容打错了,应该是使用updateByQuery更新
结论
在较小数据量时,直接使用updateByQuery方法进行更新,逻辑比较简单,代码简介。
如果数据量较大,又要求效率的话,推荐使用先查后改的方式,根据实际环境的内存大小与网络带宽,选择1000以上的分块。