reindex

问题:

​ aggregation_test索引由于个人意外操作,添加了两个无用字段query和sole,索引中也出现了对应脏数据,现需要去除该字段。

解决方案:

​ 使用reindex方案解决。代码层次连接索引库的别名。这样在代码层次不用变化,只需要将原索引的别名删除,将别名设置到新索引上就可以做到不停用es而新建索引或修改索引映射,并且不影响业务使用。

1、问题索引mapping(query、sole字段意外新增)+ 索引别名
#索引映射
GET aggregation_test/_mapping

{
  "aggregation_test": {
    "mappings": {
      "doc": {
        "properties": {
          "color": {
            "type": "keyword"
          },
          "make": {
            "type": "keyword"
          },
          "price": {
            "type": "integer"
          },
          "query": {#错误无用字段
            "properties": {
              "bool": {
                "properties": {
                  "must": {
                    "properties": {
                      "term": {
                        "properties": {
                          "id": {
                            "properties": {
                              "value": {
                                "type": "text",
                                "fields": {
                                  "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256
                                  }
                                }
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          },
          "sold": {
            "type": "date"
          },
          "sole": {#错误无用字段
            "type": "date"
          }
        }
      }
    }
  }
}


#原索引别名
GET aggregation_test/_alias

{
  "aggregation_test": {
    "aliases": {
      "aggregation_test_index": {}
    }
  }
}
2、查询原索引中的数据
GET aggregation_test/_search
{
  "size": 10, 
  "query": {
    "match_all": {}
  }
}

有两条是脏数据:
在这里插入图片描述

3、新建索引mapping
PUT aggregation_test-v2
{
  "settings": {
    "number_of_shards": "3",
    "number_of_replicas": 1
  },
  "mappings": {
    "doc": {
      "properties": {
        "color": {
          "type": "keyword"
        },
        "make": {
          "type": "keyword"
        },
        "price": {
          "type": "integer"
        },
        "sold": {
          "type": "date"
        }
      }
    }
  }
}
4、reindex第一次
POST _reindex
{
  "conflicts": "proceed",#报错跳过
  "source": {
    "index": "aggregation_test",#数据来源索引
    "size": 5,#每次数据复制数据量
    "_source": [#源数据中需要的字段
      "price",
      "color",
      "make",
      "sold"
    ],
    "query": {#只取查询条件中的数据(可以自己参考添加条件)
      "match_all": {}
    }
  },
  "dest": {
    "index": "aggregation_test-v2",#数据目标索引
    "op_type": "create"#是否是新数据(默认是以资源id来分辨的,如下来源数据id在目标索引中已存在,则跳过)
  },
  "script": {#同步过程中可以对数据进行处理
    "source": "ctx._source.price += 99",#价格字段值加99
    "lang": "painless"
  }
}
结果:数据少了???+并且报错脚本错误在这里插入图片描述
{
  "error": {
    "root_cause": [
      {
        "type": "script_exception",
        "reason": "runtime error",
        "script_stack": [
          "ctx._source.price += 99", 
          "                     ^---- HERE"
        ],
        "script": "ctx._source.price += 99",
        "lang": "painless"
      }
    ],
    "type": "script_exception",
    "reason": "runtime error",
    "script_stack": [
      "ctx._source.price += 99",
      "                     ^---- HERE"
    ],
    "script": "ctx._source.price += 99",
    "lang": "painless",
    "caused_by": {
      "type": "null_pointer_exception",#因为有数据该字段值为空
      "reason": null
    }
  },
  "status": 500
}
4.1 删除新索引中的数据重新reindex,不使用脚本,防止报错
#删除目标索引所有数据
POST aggregation_test-v2/_delete_by_query?refresh
{
  "query": {
    "match_all": {}
  }
}

#重新同步,不用脚本
POST _reindex
{
  "conflicts": "proceed",
  "source": {
    "index": "aggregation_test",
    "size": 5,
    "_source": [
      "price",
      "color",
      "make",
      "sold"
    ],
    "query": {
      "match_all": {}
    }
  },
  "dest": {
    "index": "aggregation_test-v2",
    "op_type": "create"
  }
}

#结果成功,同步数据9{
  "took": 1047,
  "timed_out": false,
  "total": 9,
  "updated": 0,
  "created": 9,
  "deleted": 0,
  "batches": 2,
  "version_conflicts": 0,
  "noops": 0,
  "retries": {
    "bulk": 0,
    "search": 0
  },
  "throttled_millis": 0,
  "requests_per_second": -1,
  "throttled_until_millis": 0,
  "failures": []
}

成功数据:但是有脏数据?这个时候,我们可以在reindex的查询条件中过滤这些脏数据

在这里插入图片描述

4.2 重新reindex,不要脏数据
POST _reindex
{
  "conflicts": "proceed",
  "source": {
    "index": "aggregation_test",
    "size": 5,
    "_source": [
      "price",
      "color",
      "make",
      "sold"
    ],
    "query": {
      "bool": {
        "must_not": [
          {
            "exists": {
              "field": "sole"
            }
          },
          {
            "exists": {
              "field": "query.bool.must.term.id.value"
            }
          }
        ]
      }
    }
  },
  "dest": {
    "index": "aggregation_test-v2",
    "op_type": "create"
  }
}

#响应
{
  "took": 880,
  "timed_out": false,
  "total": 7,
  "updated": 0,
  "created": 7,
  "deleted": 0,
  "batches": 2,
  "version_conflicts": 0,
  "noops": 0,
  "retries": {
    "bulk": 0,
    "search": 0
  },
  "throttled_millis": 0,
  "requests_per_second": -1,
  "throttled_until_millis": 0,
  "failures": []
}

结果 ,符合预期,去掉两条脏数据

在这里插入图片描述

5、将新索引添加别名:aggregation_test_index

#删除原索引别名
DELETE aggregation_test/_alias/aggregation_test_index

#给新索引添加别名
PUT aggregation_test-v2/_alias/aggregation_test_index
结果:符合预期

在这里插入图片描述

6、使用别名查询

#使用别名查询
POST aggregation_test_index/_search
{
  "query": {"match_all": {}}
}

#结果
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 7,
    "max_score": 1,
    "hits": [
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "BX7ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-07-02",
          "color": "blue",
          "price": 15000,
          "make": "toyota"
        }
      },
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "An7ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-10-28",
          "color": "red",
          "price": 10000,
          "make": "honda"
        }
      },
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "BH7ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-05-18",
          "color": "green",
          "price": 30000,
          "make": "ford"
        }
      },
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "B37ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-11-05",
          "color": "red",
          "price": 20000,
          "make": "honda"
        }
      },
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "CH7ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-01-01",
          "color": "red",
          "price": 80000,
          "make": "bmw"
        }
      },
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "Bn7ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-08-19",
          "color": "green",
          "price": 12000,
          "make": "toyota"
        }
      },
      {
        "_index": "aggregation_test-v2",
        "_type": "doc",
        "_id": "CX7ab3YBwI6V-XWfWiJZ",
        "_score": 1,
        "_source": {
          "sold": "2014-02-12",
          "color": "blue",
          "price": 25000,
          "make": "ford"
        }
      }
    ]
  }
}

7、回到script脚本有问题这里,修改脚本script

#重新reindex成功,脚本判断price字段是否有值即可
POST _reindex
{
  "conflicts": "proceed",
  "source": {
    "index": "aggregation_test",
    "size": 5,
    "_source": [
      "price",
      "color",
      "make",
      "sold"
    ],
    "query": {
      "bool": {
        "must_not": [
          {
            "exists": {
              "field": "sole"
            }
          },
          {
            "exists": {
              "field": "query.bool.must.term.id.value"
            }
          }
        ]
      }
    }
  },
  "dest": {
    "index": "aggregation_test-v2",
    "op_type": "create"
  },
  "script": {
    "source": "if(ctx._source.price != null){ctx._source.price += 99}",
    "lang": "painless"
  }
}

#响应
{
  "took": 964,
  "timed_out": false,
  "total": 7,
  "updated": 0,
  "created": 7,
  "deleted": 0,
  "batches": 2,
  "version_conflicts": 0,
  "noops": 0,
  "retries": {
    "bulk": 0,
    "search": 0
  },
  "throttled_millis": 0,
  "requests_per_second": -1,
  "throttled_until_millis": 0,
  "failures": []
}
结果查看price是否+99

[外链图片转存中...(img-XY9PnP5p-1610008448620)]

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值