定时删除es中的数据
根据时间字段删除时间之前的数据
#!/bin/sh
# example: sh delete_es_by_day.sh indexname datefield 3
index_name=$1
daycolumn=$2
savedays=$3
format_day=$4
if [ ! -n "$savedays" ]; then
echo "the args is not right,please input again...."
exit 1
fi
if [ ! -n "$format_day" ]; then
format_day='%Y%m%d'
fi
#sevendayago=`date -d "-${savedays} day " +'%Y-%m-%dT%H:%M:%S.000+0800'`
#获取x月之前的时间
sevendayago=`date -d "-${savedays} month" +'%Y-%m-%dT00:00:00.000+0800'`
#获取x天之前的时间
#sevendayago=`date -d "-${savedays} day " +'%Y-%m-%dT00:00:00.000+0800'`
curl -H "Content-Type: application/json" -XPOST "192.168.31.205:9200/${index_name}/_delete_by_query?refresh&slices=10&scroll_size=10000" -d '
{"query": {
"bool": {
"must": [
{"range": {
"'${daycolumn}'": {
"lt": "'${sevendayago}'"
}
}}
]
}
}
}'
echo "ok"
然后使用
crontab –e
17 13 * * * sh /work/shell/test.sh qzwl logsdate 3 >> /work/shell/a.txt
refresh&slices=10这个参数是10个task进行删除,自动删除,不会产生超时的问题,这个参数不要设置太大,很耗费CPU
scroll_size=10000 这个参数是一次删除10000条数据,默认是1000
如何停止这个删除命令:
先试用下面命令获取taskid
GET _tasks?detailed=true&actions=*/delete/byquery
返回参数
{
"nodes": {
"IbzpcKx0RwuGHjikxwvTCQ": {
"name": "ambari-5",
"transport_address": "192.168.31.204:9300",
"host": "192.168.31.204",
"ip": "192.168.31.204:9300",
"roles": [
"master",
"data",
"ingest"
],
"attributes": {
"ml.machine_memory": "16573603840",
"xpack.installed": "true",
"ml.max_open_jobs": "20",
"ml.enabled": "true"
},
"tasks": {
"IbzpcKx0RwuGHjikxwvTCQ:39574042": { //这个就是taskid
"node": "IbzpcKx0RwuGHjikxwvTCQ",
"id": 39574042,
"type": "transport",
"action": "indices:data/write/delete/byquery",
"status": {
"slice_id": 4,
"total": 1317212,
"updated": 0,
"created": 0,
"deleted": 0,
"batches": 1,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "delete-by-query [qzwl_dcs][qzwl_dcs]",
"start_time_in_millis": 1567149223417,
"running_time_in_nanos": 16369962967,
"cancellable": true,
"parent_task_id": "IbzpcKx0RwuGHjikxwvTCQ:39574027",
"headers": {}
},
"description": "delete-by-query [qzwl_dcs][qzwl_dcs]",
"start_time_in_millis": 1567149223415,
"running_time_in_nanos": 16372180383,
"cancellable": true,
"parent_task_id": "IbzpcKx0RwuGHjikxwvTCQ:39574027",
"headers": {}
}
}
}
}
}
使用以下命令停止
POST _tasks/IbzpcKx0RwuGHjikxwvTCQ:39574028/_cancel