elasticsearch mapping 学习（parent-child）

最新推荐文章于 2023-02-02 09:13:10 发布

丹江怒潮

最新推荐文章于 2023-02-02 09:13:10 发布

阅读量1.5k

点赞数

分类专栏： elasticsearch

elasticsearch 专栏收录该内容

32 篇文章 0 订阅

订阅专栏

ES 父子文档查询

父子文档的特点

1. 父/子文档是完全独立的。

2. 父文档更新不会影响子文档。

3. 子文档更新不会影响父文档或者其它子文档。

父子文档的映射与索引

1. 父子关系 type 的建立必须在索引新建或 update-mapping 时候确定好

PUT /company
{
  "mappings": {
    "branch": {},             //父文档 type
    "employee": {            
      "_parent": {
        "type": "branch"      //子文档 type
      }
    }
  }
}

2. 父文档的索引和普通文档索引一样。

POST /company/branch/_bulk
{ "index": { "_id": "london" }}
{ "name": "London Westminster", "city": "London", "country": "UK" }

3. 子文档索引必须指定其对应的父文档 ID，作用：

建立父子文档之间的关联
确保子文档能够被索引到父文档所在分片（parent id 作为 route）

PUT /company/employee/1?parent=london     //指定 id = london 的父文档
{
  "name":  "Alice Smith",
  "dob":   "1970-10-24",
  "hobby": "hiking"
}

4. 如果要更改文档的父文档，不能仅仅 update 或者 reindex 旧文档（新的父文档可能在不同分片上），需要先删除旧文档再重新索引。

父子关系的应用

看到 parent-child 关系，我们很容易想到的是像 SQL 那样的各种 JOIN 操作——比如查询某个文档并一并取回所有的父或子文档等。

然而，ES 中不支持类似的 JOIN 查询。即便 child aggregation 也不能做到像 SQL 那样的 JOIN 操作！

在 ES 中的 parent-child 关系基本可以理解为是一个过滤条件，如下：

//查询某文档，只有该文档有"父文档"且满足一定条件才算匹配
{"has_parent": {                //文档是否有 parent
      "type": "branch",         //其 parent 所在 type 必须是 branch
      "query": {                //其 parent 必须满足以下 query 条件
        "match": {
          "country": "UK"
        }
      }
    }                           //如果满足以上条件，hit 该文档
}
//查询某文档，只有该文档有"子文档"且满足一定条件才算匹配
{
"has_child": {                       //文档是否有 child
      "type":       "employee",      //其 child所在 type 必须是 employee
      "query": {                     //其 parent 必须满足以下 query 条件
        "match": {
          "name": "Alice Smith"
        }
      }
    }                                //如果满足以上条件，hit 该文档
}

1. has_child：基于子文档的内容，查找父文档

//请求 GET /company/branch/_search
{
  "query": {
    "has_child": {                                //基于 child 的内容，查询满足条件的 parent 文档
      "type":       "employee",
      "query": {                                   //在 child 中执行 match query操作
        "match": {
          "name": "Alice Smith"
        }
      }
    }
  }
}
//结果
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "company",
        "_type": "branch",                     //注意！！！返回的是 parent 的文档
        "_id": "london",
        "_score": 1,
        "_source": {
          "name": "London Westminster",
          "city": "London",
          "country": "UK"
        }
      }
    ]
  }
}

2. has_parent：基于父文档的内容，查找子文档

//请求 GET /company/employee/_search
{
  "query": {
    "has_parent": {                       //基于 parent 的内容，查询满足条件的 child 文档
      "type": "branch", 
      "query": {                             //在 parent 中执行 match query 查询
        "match": {
          "country": "UK"
        }
      }
    }
  }
}
//结果 
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "company",
        "_type": "employee",              //注意！！！返回的是 child 的文档
        "_id": "1",
        "_score": 1,
        "_routing": "london",
        "_parent": "london",
        "_source": {
          "name": "Alice Smith",
          "dob": "1970-10-24",
          "hobby": "hiking"
        }
      }
    ]
  }
}

3. children aggregation：对关联的 child 文档进行聚合操作

//请求 GET /company/branch/_search
{
  "size" : 0,
  "aggs": {
    "country": {
      "terms": { 
        "field": "country"                        //以不同的 country 来分组（桶分）
      },
      "aggs": {
        "employees": {
          "children": {                           //children aggregation，子 type 为 employee
            "type": "employee"
          },
          "aggs": {
            "hobby": {
              "terms": { 
                "field": "hobby"                  //以不同的 hobby 来分组（桶分）
              }
            }
          }
        }
      }
    }
  }
}
//结果
"aggregations": {
   "country": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [                                                //country 聚合结果
        {
          "key": "uk",                                 
          "doc_count": 2,
          "employees": {                                          //children aggregation 聚合
            "doc_count": 1,
            "hobby": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [                                       //hobby 聚合结果
                {
                  "key": "hiking",
                  "doc_count": 1
                }
              ]
            }
          }
        }
      ]
    }
}