机器配置
Data nodes:i3.4xlarge.search * 3 16C 122G
master nodes:c5.2xlarge.search * 3 8C 16G
案例1-kafka
kafka consumer tps = 18k qps
bulk request batch size = 100 (程序单线程等待100个消息到达后提交,因此系统的吞吐量与batch无关)
数据量(cat命令):
green open admin_operate_log_v1_2021 1 0 432 0 183.9kb 183.9kb
green open admin_operate_log_v1_2022 1 0 38855695 0 10.9gb 10.9gb
green open admin_operate_log_v1_2023 1 0 15996192 0 4.5gb 4.5gb
索引配置
"settings": {
"index": {
"refresh_interval": "-1",
"number_of_shards": "1",
"number_of_replicas": "0"
}
}
ES负载
cpu: 写入期间<=49%,写入完成后短暂峰值:68-72%
内存:73%左右
性能
耗时:55-60分钟左右
案例2-reindex
数据量
health | status | index | uuid | pri | rep | docs.count | docs.deleted | store.size | pri.store.size |
---|---|---|---|---|---|---|---|---|---|
green | open | user_operation_log_v1_202211 | F7qtS9A3QxCFlIiZ8vCB0Q | 2 | 1 | 6471447 | 0 | 5.3gb | 2.6gb |
green | open | user_operation_log_v1_202210 | 9MWtw90pTLes7ZzsDo5zxw | 2 | 1 | 5479698 | 0 | 4.3gb | 2.1gb |
green | open | admin_operate_log_v1_2021 | cxmLz54CR2C2eQkWcftGdw | 1 | 0 | 432 | 0 | 183.9kb | 183.9kb |
green | open | admin_operate_log_v1_2022 | mD9B3fY3T5moY2iMoSJ6rA | 1 | 0 | 38855695 | 0 | 10.9gb | 10.9gb |
green | open | admin_operate_log_v1_2023 | Q8umgdzcR1idQncdPnUCbA | 1 | 0 | 15996192 | 0 | 4.5gb | 4.5gb |
green | open | user_operation_log_v1_202212 | 9Bo4HkB1T_G8oiOiJVcDow | 2 | 1 | 5714707 | 0 | 4.6gb | 2.3gb |
green | open | user_operation_log_v1_202208 | 8jMbqQPIQT-GI67lo72Xbg | 2 | 1 | 5070471 | 0 | 5gb | 2.5gb |
green | open | user_operation_log_v1_202207 | ppYBaM3NQkaPBkZowXqbdA | 2 | 1 | 5253290 | 0 | 5.1gb | 2.5gb |
green | open | user_operation_log_v1_202206 | A12fRvkhTOCcI12QPCzbxw | 2 | 1 | 5499146 | 0 | 5.6gb | 2.8gb |
green | open | user_operation_log_v1_202305 | ltM-SbzaQkaEmqVmV84iEQ | 2 | 1 | 1659596 | 50 | 1.1gb | 583mb |
green | open | user_operation_log_v1_202304 | 0W9kvl-jTlWizYltRq4yGQ | 2 | 1 | 7589405 | 44 | 4.7gb | 2.3gb |
green | open | user_operation_log_v1_202205 | KzLscm-gRSq-v7aps96M4Q | 2 | 1 | 7343020 | 0 | 7.5gb | 3.7gb |
green | open | user_operation_log_v1_202209 | DDMTU6mVTn-uF1NZPJ-x4Q | 2 | 1 | 4699022 | 0 | 4.3gb | 2.1gb |
green | open | user_operation_log_v1_202303 | f13sJ9wyRxagWYOCK0t07w | 2 | 1 | 7698296 | 0 | 5gb | 2.5gb |
green | open | token_sale_v1 | K7FFK24FSWi1oKlXrRuBug | 3 | 1 | 1157 | 22 | 550.2kb | 275.1kb |
green | open | user_operation_log_v1_202204 | grbxxC5iQie3ADnnyE5Erw | 2 | 1 | 8288019 | 0 | 8.3gb | 4.1gb |
green | open | user_operation_log_v1_202302 | oDOUznF5QKGgTmetRv2uRA | 2 | 1 | 6797596 | 0 | 4.4gb | 2.2gb |
green | open | user_operation_log_v1_202203 | aJLU9fhzT3uCNf2wVdb3Gw | 2 | 1 | 9291234 | 0 | 11.3gb | 5.6gb |
green | open | .kibana_1 | ErpcRNskT_-GLNVpeQd-1A | 1 | 1 | 1 | 0 | 10.1kb | 5kb |
green | open | user_operation_log_v1_202301 | BqpOEoDTTO2ue-uDfjUoCA | 2 | 1 | 6103653 | 152925 | 4.2gb | 2.1gb |
green | open | user_operation_log_v1_202202 | d9VaDToWSs2qbdyxJyDvdg | 2 | 1 | 9415513 | 0 | 12gb | 6gb |
green | open | user_operation_log_v1_202201 | FahUyKbeTYK_zYYc_UhofQ | 2 | 1 | 11594662 | 0 | 15.4gb | 7.7gb |
索引配置
es_host='https://my_es_host'
indices=(user_operation_log_v1_202301 user_operation_log_v1_202302)
curl --location --request POST "${es_host}/_template/user_operation_log_template_v2" \
--header 'Content-Type: application/json' \
--data-raw '{
"order": 0,
"index_patterns": [
"user_operation_log_v2*"
],
"mappings": {
"dynamic": "false",
"properties": {
"user_id": {
"type": "long"
},
"operation": {
"type": "keyword",
"normalizer": "caseSensitive"
},
"real_ip": {
"type": "keyword"
},
"full_ip": {
"type": "keyword"
},
"client_type": {
"type": "keyword",
"normalizer": "caseSensitive"
},
"version_code": {
"type": "keyword",
"index": false
},
"apikey": {
"type": "keyword",
"index": false
},
"user_agent": {
"type": "keyword",
"index": false
},
"request_time": {
"type": "long"
},
"response_time": {
"type": "long",
"index": false
},
"request": {
"type": "text"
},
"response": {
"type": "text"
},
"response_status": {
"type": "keyword",
"normalizer": "caseSensitive"
}
}
},
"settings": {
"index": {
"refresh_interval" : "-1",
"number_of_shards": "2",
"number_of_replicas": "0"
},
"analysis": {
"normalizer": {
"caseSensitive": {
"filter": "lowercase",
"type": "custom"
}
}
}
},
"aliases": {
"user_operation_log_v2": {}
}
}'
for((i=0;i<${#indices[*]};i++));do
target_index=${indices[i]/v1/v2};
message="source = ${indices[i]}, target = ${target_index}";
echo "${message}"
start=$(date +"%s")
curl -HContent-Type:application/json -XPOST "${es_host}/_reindex?slices=2&requests_per_second=-1&wait_for_completion=false&pretty" -d'{
"source": {
"index": "'${indices[i]}'"
,"size": 5000
},
"dest": {
"index": "'${target_index}'"
}
}'
end=$(date +"%s")
echo "${message} cost `expr $end - $start` seconds"
done
ES负载
性能
耗时:10分钟左右
第一次提交2个索引reindex
{
"completed": true,
"task": {
"node": "b8mpzb7ARLau_iJVU4fkmg",
"id": 246335488,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 9415513,
"updated": 0,
"created": 9415513,
"deleted": 0,
"batches": 1884,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until_millis": 0,
"slices": [
{
"slice_id": 0,
"total": 4708452,
"updated": 0,
"created": 4708452,
"deleted": 0,
"batches": 942,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until_millis": 0
},
{
"slice_id": 1,
"total": 4707061,
"updated": 0,
"created": 4707061,
"deleted": 0,
"batches": 942,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until_millis": 0
}
]
},
"description": "reindex from [user_operation_log_v1_202202] to [user_operation_log_v2_202202][_doc]",
"start_time_in_millis": 1683784030867,
"running_time_in_nanos": 447756932234,
"cancellable": true,
"headers": {
}
},
"response": {
"took": 447740,
"timed_out": false,
"total": 9415513,
"updated": 0,
"created": 9415513,
"deleted": 0,
"batches": 1884,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until": "0s",
"throttled_until_millis": 0,
"slices": [
{
"slice_id": 0,
"total": 4708452,
"updated": 0,
"created": 4708452,
"deleted": 0,
"batches": 942,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until": "0s",
"throttled_until_millis": 0
},
{
"slice_id": 1,
"total": 4707061,
"updated": 0,
"created": 4707061,
"deleted": 0,
"batches": 942,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until": "0s",
"throttled_until_millis": 0
}
],
"failures": [
]
}
}
-
{
"completed": true,
"task": {
"node": "BrXZu22XQfi94P1l5ErOwA",
"id": 168840526,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 9291234,
"updated": 0,
"created": 9291234,
"deleted": 0,
"batches": 1859,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until_millis": 0,
"slices": [
{
"slice_id": 0,
"total": 4642972,
"updated": 0,
"created": 4642972,
"deleted": 0,
"batches": 929,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until_millis": 0
},
{
"slice_id": 1,
"total": 4648262,
"updated": 0,
"created": 4648262,
"deleted": 0,
"batches": 930,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until_millis": 0
}
]
},
"description": "reindex from [user_operation_log_v1_202203] to [user_operation_log_v2_202203][_doc]",
"start_time_in_millis": 1683784030916,
"running_time_in_nanos": 437625719457,
"cancellable": true,
"headers": {
}
},
"response": {
"took": 437607,
"timed_out": false,
"total": 9291234,
"updated": 0,
"created": 9291234,
"deleted": 0,
"batches": 1859,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until": "0s",
"throttled_until_millis": 0,
"slices": [
{
"slice_id": 0,
"total": 4642972,
"updated": 0,
"created": 4642972,
"deleted": 0,
"batches": 929,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until": "0s",
"throttled_until_millis": 0
},
{
"slice_id": 1,
"total": 4648262,
"updated": 0,
"created": 4648262,
"deleted": 0,
"batches": 930,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1.0,
"throttled_until": "0s",
"throttled_until_millis": 0
}
],
"failures": [
]
}
}
第二次提交14个索引reindex
202204-202305
日志过多省略,与第一次提交差别不大
总结
案例1
kafka:tps=18k
文档量:54851887
文档大小:15.4GB
ES节点:单个
耗时:55-60分钟
负载:cpu=50%左右,内存=70%左右
案例2
根据第二次提交的reindex分析
文档量:113968775
文档大小:53.6GB
ES节点:3个
耗时:10分钟
负载:cpu=80%左右,内存=80%左右
正如开头描述的,系统单线程工作瓶颈明显,并发与吞吐量低,优点是限制了单个业务索引(多个物理索引按月分片)资源占用,避免拖垮系统
reindex风险
14个索引reindex的异步任务启动时间如下,可以看到几乎是同时启动的,因此批量操作是要把控并行的数量,slices参数以及size参数的配置,都要事先把控,避免将服务器跑垮
1683785201349 2023-05-11 14:06:41
1683785201371 2023-05-11 14:06:41
1683785201395
1683785201418
1683785201440
1683785201463
1683785201485
1683785201508
1683785201531
1683785201551
1683785201572
1683785201598
1683785201623
1683785201645