记一次es批量导入数据的惨痛经历(分享给缺少团队协作的苦逼人儿):
1、确认es版本及maven依赖es版本;
2、确认es启动内存、索引刷新规则、默认批量写入数据量大小;
3、分批次导入,没批数量1000-5000;
4、如使用多线程操作,计数器使用ThreadLocal
package com.config;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpHost;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Slf4j
@Component
public class ElasticsearchDataHandle {
@Value("${spring.elasticsearch.rest.nodes:127.0.0.1}")
private String host;
@Value("${spring.elasticsearch.rest.port:9200}")
private String port;
@Value("${spring.elasticsearch.rest.indexName:indexName}")
private String indexName;//索引名称
/**
* 创建连接 高级客户端
*/
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost(host, Integer.parseInt(port), "http")));
return client;
}
/**
* Elasticsearch数据导入
*/
public void addElasticsearchData(List<Map<String, Object>> addEsDataMapList) {
//获取连接
RestHighLevelClient client = restHighLevelClient();
try {
//创建请求
BulkRequest bulkRequest = new BulkRequest();
//创建index请求 千万注意,这个写在循环外侧,否则UDP协议会有丢数据的情况,看运气
IndexRequest requestData = null;
for (Map<String, Object> addEsDataMap : addEsDataMapList) {//添加数据
requestData = new IndexRequest(indexName, "_doc", addEsDataMap.get("id").toString()).source(addEsDataMap, XContentType.JSON);
bulkRequest.add(requestData);
}
log.info("es同步数据数量:{}", bulkRequest.numberOfActions());
//设置索引刷新规则
bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
//分批次提交,数量控制
if (bulkRequest.numberOfActions() >= 1) {
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
log.info("es同步数据结果:{}", bulkResponse.hasFailures());
}
} catch (Exception e) {
e.printStackTrace();
log.error("es同步数据执行失败:{}", addEsDataMapList);
} finally {
try {
client.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}