elasticsearch reindex task 小笔记

# 使用 kibana 的案例数据来做演示。把索引 kibana_sample_data_logs 的数据复制到索引 kibana_sample_data_logs 中
POST _reindex?requests_per_second=100&slices=3
{
  "source": {
    "index": "kibana_sample_data_logs"
  },
  "dest": {
    "index": "kibana_sample_data_logs_01"
  }
}

DELETE kibana_sample_data_logs_01
# 参数 track_total_hits 的作用是返回 总数量,size 的作用是返回多少条数据
GET kibana_sample_data_logs_01/_search?track_total_hits=true&size=2

GET _cat/tasks
# 按功能进行查询
GET _tasks?actions=*/write/reindex

# 查看任务情况
GET _tasks/JhA2KHkJQOG5nH_JFKENIg:492068

# 可以动态调整参数
POST _reindex/JhA2KHkJQOG5nH_JFKENIg:492068/_rethrottle?requests_per_second=20

# 取消任务 POST _tasks/<task_id>/_cancel
POST _tasks/JhA2KHkJQOG5nH_JFKENIg:504352/_cancel

我们可以对一个索引中的数据进行分类隔离,使用 routing 来实现

# 取消任务 POST _tasks/<task_id>/_cancel
POST _tasks/JhA2KHkJQOG5nH_JFKENIg:504352/_cancel

# 首先需要设置分片
PUT kibana_sample_data_flights_001
{
  "settings": {
    "number_of_shards": 3
  }
}
# 添加自定义的 routing,注意 routing 的值,一定要加上 “=”
# 设置索引的 routing 的作用,就是做数据隔离。
POST _reindex
{
  "conflicts": "proceed", 
  "source": {
    "index": "kibana_sample_data_flights"
  },
  "dest": {
    "index": "kibana_sample_data_flights_001",
    "routing": "=flight"
  }
}
# 条件查询一下数据
GET kibana_sample_data_flights_001/_search
{
  "query": {
    "term": {
      "DestAirportID": {
        "value": "MAN"
      }
    }
  }
}
# 删除数据,重新复制一遍。添加条件
DELETE kibana_sample_data_flights_001

# 我们可以添加查询条件 DestAirportID 来复制数据
# 例如具有时间轴的数据,可以重建索引及设置 routing 来实现当前数据和历史数据的隔离
GET kibana_sample_data_flights/_mapping
POST _reindex
{
  "conflicts": "proceed", 
  "source": {
    "index": "kibana_sample_data_flights",
    "query": {
      "term": {
        "DestAirportID": {
          "value": "MAN"
        }
      }
    }
  },
  "dest": {
    "index": "kibana_sample_data_flights_001",
    "routing": "=flight"
  }
}
# 查看数据
GET kibana_sample_data_flights_001/_search?track_total_hits=true

 


# 还可以设置数量,在添加查询条件的同时,添加参数 max_docs 来限制数据的数量。
DELETE kibana_sample_data_flights_001
POST _reindex
{
  "conflicts": "proceed", 
  "max_docs": 100,
  "source": {
    "index": "kibana_sample_data_flights",
    "query": {
      "term": {
        "DestAirportID": {
          "value": "MAN"
        }
      }
    }
  },
  "dest": {
    "index": "kibana_sample_data_flights_001",
    "routing": "=flight"
  }
}

GET kibana_sample_data_flights_001/_search?track_total_hits=true&routing=flight
# 可以将多个索引的数据复制到同一个索引上
POST _reindex
{
  "conflicts": "proceed", 
  "max_docs": 100,
  "source": {
    "index": ["kibana_sample_data_flights","kibana_sample_data_logs"],
    "query": {
      "term": {
        "DestAirportID": {
          "value": "MAN"
        }
      }
    }
  },
  "dest": {
    "index": "kibana_sample_data_flights_002",
    "routing": "=flight"
  }
}

 需要注意的时,如果 "kibana_sample_data_flights","kibana_sample_data_logs" 两个索引中的数据,如果出现数据 id 相同的话,只会采用前面的数据而会忽略后面的数据。

在复制数据到新索引中之前,我们应该先设置索引的 mapping 。

我们可以再做一个案例:

首先创建两个索引 studentt-001 和 worker-001

PUT student-001
{
  "settings": {
    "number_of_shards": 3
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "long"
      },
      "name": {
        "type": "text",
        "fields": {
          "key": {
            "type": "keyword"
          }
        }
      },
      "year": {
        "type": "integer",
        "fields": {
          "key": {
            "type": "keyword"
          }
        }
      }
    }
  }
}

PUT worker-001
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 2
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "integer",
        "fields": {
          "key": {
            "type": "keyword"
          }
        }
      },
      "name":{
        "type": "keyword"
      },
      "income":{
        "type": "integer"
      }
    }
  }
}
DELETE worker-001

添加数据

POST _bulk
{"index":{"_index":"worker-001","_id":1}}
{"id":1,"name":"wyf1","income":100}
{"index":{"_index":"worker-001","_id":2}}
{"id":2,"name":"wyf2","income":110}

PUT _bulk
{"index":{"_index":"student-001","_id":1}}
{"id": 1,"name": "student-1","year": 1}
{"index":{"_index":"student-001","_id":2}}
{"id": 2,"name": "student-2","year": 2}
{"index":{"_index":"student-001","_id":3}}
{"id": 3,"name": "student-3","year": 3}
{"index":{"_index":"student-001","_id":4}}
{"id": 4,"name": "student-4","year": 1}
{"index":{"_index":"student-001","_id":5}}
{"id": 5,"name": "student-5","year": 3}

进行数据复制,条件是 id 字段的值 大于 0 小于 3 的数据。

POST _reindex
{
  "source": {
    "index": ["worker-001","student-001"],
    "query": {
      "range": {
        "id": {
          "gte": 0,
          "lte": 3
        }
      }
    }
  },
  "dest": {
    "index": "wyf-001"
  },
  "script": {
    "source": """
      if (ctx._index=='student-001'){
        ctx._source.studentName=ctx._source.name;
        ctx._source.year=ctx._source.year;
      } else {
        ctx._source.workerName=ctx._source.name;
        ctx._source.income=ctx._source.income;
      }
      ctx._source.remove("name");
    """
  }
}

查看数据 GET wyf-001/_search

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值