Elasticsearch 的一些示例, 增删改查，映射，结构化查询，聚合

最新推荐文章于 2024-07-25 00:20:34 发布

今晚打酱油8

最新推荐文章于 2024-07-25 00:20:34 发布

阅读量1.9k

点赞数 2

分类专栏：全文索引

本文链接：https://blog.csdn.net/xj626852095/article/details/54343182

版权

全文索引专栏收录该内容

12 篇文章 0 订阅

订阅专栏

es版本 5.1.1

基本增删改操作

查询删除

POST /user_analysys/user_event/_delete_by_query
{
  "query": { 
    "match_all": {}
  }
}

PUT /megacorp/employee/1
{
    "first_name" : "John",
    "last_name" :  "Smith",
    "age" :        25,
    "about" :      "I love to go rock climbing",
    "interests": [ "sports", "music" ]
}

PUT /megacorp/employee/2
{
    "first_name" :  "Jane",
    "last_name" :   "Smith",
    "age" :         32,
    "about" :       "I like to collect rock albums",
    "interests":  [ "music" ]
}

PUT /megacorp/employee/3
{
    "first_name" :  "Douglas",
    "last_name" :   "Fir",
    "age" :         35,
    "about":        "I like to build cabinets",
    "interests":  [ "forestry" ]
}

GET /megacorp/employee/1
GET /megacorp/employee/_search
GET /megacorp/employee/1/_source

GET /megacorp/employee/_search?q=first_name:Jane

#局部更新
POST /megacorp/employee/1/_update
{
  "doc": {
    "about": "I love to go rock climbing xxx"
  } 
}

#更新不存在则插入
POST /megacorp/employee/4/_update
{
  "doc": {
    "about": "I love to go rock climbing xxx"
  },
  "upsert": {
    "first_name" : "xiang",
    "last_name" :  "kevin",
    "age" :        25,
    "about" :      "I love to go rock climbing zzz",
    "interests": [ "sports", "music" ]
  }
}


POST /megacorp/employee/_search
{
    "query": {
        "match": {
           "first_name": "John"
        }
    }
}

POST /megacorp/employee/_search
{
    "query": {
        "match": {
           "about": "rock climbing"
        }
    }
}

#match_phrase 确切的匹配若干个单词或者短语
POST /megacorp/employee/_search
{
    "query": {
        "match_phrase": {
           "about": "rock climbing"
        }    
    },
    "highlight": {
        "fields": {
            "about":{}
        }
    }
}

POST /megacorp/employee/_search  
{  
    "query": {  
      "bool": {
        "must": {
          "match": {  
           "last_name": "Smith"  
          }
        },
        "filter": {
          "range": {
            "age": {
              "gte": 10,
              "lte": 30
            }
          }
        }
        
      }
    }
}  

#https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata.html
GET /megacorp/employee/_mapping
PUT /megacorp/_mapping/employee
{
  "properties": {
    "interests": { 
      "type":     "text",
      "fielddata": true
    }
  }
}
  
#聚合,聚合也允许分级汇总。例如，让我们统计每种兴趣下职员的平均年龄  
POST /megacorp/employee/_search  
{     
    "query": {  
        "match": {  
           "last_name": "Smith"  
        }  
    },   
    "aggs" : {  
        "all_interests" : {  
            "terms" : {"field" : "interests"},  
            "aggs":{  
                "avg_age":{  
                    "avg" : {"field":"age"}  
                }  
            }  
              
        }  
    }  
}

分词和映射

GET /megacorp/employee/_search
GET /megacorp/employee/_mapping
#分词测试 
GET /_analyze?analyzer=standard&text=Text to analyze
GET /megacorp/_analyze?field=about&text=Black-cats

GET /gb/_mapping

DELETE /gb

#创建映射 
PUT /gb
{
  "mappings": {
    "type_tweet" : {
      "properties" : {
        "tweet" : {
          "type" :    "string",
          "analyzer": "english"
        },
        "date" : {
          "type" :   "date"
        },
        "name" : {
          "type" :   "string"
        },
        "user_id" : {
          "type" :   "long"
        }
      }
    }
  }
}

#能添加新字段的映射 
PUT /gb/_mapping/type_tweet
{
  "properties" : {
    "tag" : {
      "type" :    "string",
      "index":    "not_analyzed"
    }
  }
}

#不能修改映射 
PUT /gb/_mapping/type_tweet
{
  "properties" : {
    "tag" : {
      "type" :    "long"
    }
  }
}

结构化查询和结构化过滤

#结构化查询 和 结构化过滤
#原则上来说，使用查询语句做全文本搜索或其他需要进行相关性评分的时候，剩下的全部用过滤语句
#一条过滤语句会询问每个文档的字段值是否包含着特定值。,不会进行相关性分析和评分
#查询语句会询问每个文档的字段值与特定值的匹配程度如何？
#可以使用以下语句验证合法性和执行过程
#GET /megacorp/employee/_validate/query?explain
#结构如下：
GET /megacorp/employee/_search?explain
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "about": "rock"
        }},
        {"match": {
          "last_name": "Smith"
        }}
      ],
      "filter": {
        "range": {
          "age": {
            "gte": 10,
            "lte": 25
          }
        }
      }
    }
  }
}


GET /megacorp/employee/1
GET /megacorp/employee/_search


GET /megacorp/employee/_search
{
  "query": {
    "match": {
      "about": "rock climbing"
    }
  }
}

#multi_match查询允许你做match查询的基础上同时搜索多个字段
GET /megacorp/employee/_search
{
  "query": {
    "multi_match": {
      "query": "Smith",
      "fields": ["first_name","last_name"]
    }
  }
}

#match_phrase 想要精确匹配所有同时包含 
GET /megacorp/employee/_search
{
  "query": {
    "match_phrase": {
      "about": "rock climbing"
    }
  }
}

GET /megacorp/_analyze?field=about&text=I love to go rock climbing xxx 
#term是代表完全匹配，即不进行分词器分析，文档中必须包含整个搜索的词汇, 下面的查询由于已经被分词了， 就会找不到rock climbing， 搜不出结果
GET /megacorp/employee/_search
{
  "query": {
    "term": {
      "about": "rock climbing"
    }
  }
}

#bool查询 must，must_not或者should
#must: 文档必须完全匹配条件
#should: should下面会带一个以上的条件，至少满足一个条件，这个文档就符合should
#must_not: 文档必须不匹配条件
GET /megacorp/employee/_search
{
  "query": {
    "bool": {
      "must": [
        {"range": {
          "age": {
            "gte": 10,
            "lte": 25
          }
        }}
      ],
      "should": [
        {"match": {
          "last_name": "Smith"
        }}
      ],
      "must_not": [
        {"match": {
          "first_name": "xiang"
        }}
      ]
    }
  }
}

结构化搜索

## 结构化搜索

GET /megacorp/employee/_mapping

#过滤器的bool使用
GET /megacorp/employee/_search
{  
  "query": {  
    "bool": {  
      "must": [
        {"match": {
          "about": "rock climbing"
        }}
      ],
      "filter": {
        "bool": {
          "must" : [
            { "range": { "age": { "gte": 25 }}}
          ]
        }
      }
    }  
  }  
} 

-- sql 等价 ---
SELECT product
FROM   products
WHERE  (price = 20 OR productID = "XHDK-A-1293-#fJ3")
  AND  (price != 30)
  
"filter" : {
  "bool" : {
    "should" : [
       { "term" : {"price" : 20}}, 
       { "term" : {"productID" : "XHDK-A-1293-#fJ3"}}
    ],
    "must_not" : {
       "term" : {"price" : 30} 
    }
 }
}

SELECT document
FROM   products
WHERE  productID      = "KDKE-B-9947-#kL5"
  OR (     productID = "JODL-X-1937-#pV7"
       AND price     = 30 )
       
"filter" : {
  "bool" : {
    "should" : [
      { "term" : {"productID" : "KDKE-B-9947-#kL5"}}, 
      { "bool" : { 
        "must" : [
          { "term" : {"productID" : "JODL-X-1937-#pV7"}},
          { "term" : {"price" : 30}}
        ]
      }}
    ]
 }
}

# 文档是否存在某个字段 WHERE  tags IS NOT NULL
# exists 过滤器 <==> is not null 
# missing 过滤器 <==> is null 
GET /megacorp/employee/_search
{
  "query": {
    "bool": {
      "filter": {
        "exists": {
          "field": "age"
        }
      }
    }
  }
}

嵌套

### 嵌套对象

DELETE  /my_index

PUT /my_index
{
  "mappings": {
    "blogpost": {
      "properties": {
        "title": { "type": "string"  },
        "body": { "type": "string"  },
        "tags": { "type": "string"  },
        "comments": {
          "type": "nested",
          "properties": {
            "name":    { "type": "string"  },
            "comment": { "type": "string"  },
            "age":     { "type": "short"   },
            "stars":   { "type": "short"   },
            "date":    { "type": "date"    }
          }
        }
      }
    }
  }
}

GET /my_index/blogpost/_mapping
GET /my_index/blogpost/_search

PUT /my_index/blogpost/1
{
  "title": "Nest eggs",
  "body":  "Making your money work...",
  "tags":  [ "cash", "shares" ],
  "comments": [ 
    {
      "name":    "John Smith",
      "comment": "Great article",
      "age":     28,
      "stars":   4,
      "date":    "2014-09-01"
    },
    {
      "name":    "Alice White",
      "comment": "More like this please",
      "age":     31,
      "stars":   5,
      "date":    "2014-10-22"
    }
  ]
}
PUT /my_index/blogpost/2
{
  "title": "Investment secrets",
  "body":  "What they don't tell you ...",
  "tags":  [ "shares", "equities" ],
  "comments": [
    {
      "name":    "Mary Brown",
      "comment": "Lies, lies, lies",
      "age":     42,
      "stars":   1,
      "date":    "2014-10-18"
    },
    {
      "name":    "John Smith",
      "comment": "You're making it up!",
      "age":     28,
      "stars":   2,
      "date":    "2014-10-16"
    }
  ]
}



GET /my_index/blogpost/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "title": "eggs" }},
        {
          "nested": {
            "path": "comments",
            "query": {
              "bool": {
                "must": [
                  { "match": { "comments.name": "john" }},
                  { "match": { "comments.age":  28     }}
                ]
        }}}}
      ]
}}}


#取回在十月中有收到回应的blog文章，并依照所取回的各个blog文章中最少stars数量的顺序作排序
GET /my_index/blogpost/_search
{
  "query": {
    "nested": { 
      "path": "comments",
      "query": {
        "bool": {
          "filter": {
            "range": {
              "comments.date": {
                "gte": "2014-10-01",
                "lt":  "2014-11-01"
              }
            }
          }
        }
      }
    }
  },
  "sort": {
    "comments.stars": { 
      "order": "asc",   
      "mode":  "min",   
      "nested_filter": { 
        "range": {
          "comments.date": {
            "gte": "2014-10-01",
            "lt":  "2014-11-01"
          }
        }
      }
    }
  }
}


#嵌套对象的聚合
GET /my_index/blogpost/_search
{
  "aggs": {
    "comments": { 
      "nested": {
        "path": "comments"
      },
      "aggs": {
        "by_month": {
          "date_histogram": { 
            "field":    "comments.date",
            "interval": "month",
            "format":   "yyyy-MM"
          },
          "aggs": {
            "avg_stars": {
              "avg": { 
                "field": "comments.stars"
              }
            }
          }
        }
      }
    }
  }
}

PUT /my_index/blogpost/_mapping
{  
  "properties": {  
    "tags": {   
      "type":     "string",  
      "fielddata": true  
    }  
  }  
}  

#<1> 共有四个评论
#<2> 有两个评论的发表者年龄介於20至30之间
#<3> 两个blog文章与这些评论相关
#<4> 这些blog文章的火红标签是shares丶cash丶equities
GET /my_index/blogpost/_search
{
  "aggs": {
    "comments": {
      "nested": { 
        "path": "comments"
      },
      "aggs": {
        "age_group": {
          "histogram": { 
            "field":    "comments.age",
            "interval": 10
          },
          "aggs": {
            "blogposts": {
              "reverse_nested": {}, 
              "aggs": {
                "tags": {
                  "terms": { 
                    "field": "tags"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

agg聚合

#es聚合

DELETE /user_analysys_little
PUT /user_analysys_little
PUT /user_analysys_little/_mapping/user  
{  
  "properties" : {  
    "userId" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    },
    "userName" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    }, 
    "provinceId" : {  
      "type" :    "long",  
      "index":    "not_analyzed"  
    },
    "provinceName" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    }, 
    "age" : {  
      "type" :    "long",  
      "index":    "not_analyzed"  
    }
  }  
}  

PUT /user_analysys_little/_mapping/user_event  
{  
  "properties" : {  
    "userId" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    },
    "userName" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    }, 
    "provinceId" : {  
      "type" :    "long",  
      "index":    "not_analyzed"  
    },
    "provinceName" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    }, 
    "age" : {  
      "type" :    "long",  
      "index":    "not_analyzed"  
    }, 
    "eventId" : {  
      "type" :    "long",  
      "index":    "not_analyzed"  
    }, 
    "eventName" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    }, 
    "statDate" : {  
      "type" :    "date",  
      "index":    "not_analyzed"  
    }, 
    "productName" : {  
      "type" :    "string",  
      "index":    "not_analyzed"  
    } 
  }  
}  


GET _cat/indices

GET /user_analysys_little/user/_search
GET /user_analysys_little/user_event/_search

# select userId from user_event where provinceName='青海' group by userId
# 如果省略 query部分就是相当于对全局结果做统计
GET /user_analysys_little/user_event/_search
{
  "size": 1, 
  "query": {
    "term": {
      "provinceName": {
        "value": "青海"
      }
    }
  }, 
  "aggs": {
    "group_userid": {
      "terms": {
        "field": "userId",
        "order": {
          "_count": "desc"
        }
      }
    }
  }
}


#全局桶, 将会忽略query的条件，对全局数据进行统计 
#青海的平均年龄和全国的平均年龄做比较
GET /user_analysys_little/user_event/_search
{
  "size": 1, 
  "query": {
    "term": {
      "provinceName": {
        "value": "青海"
      }
    }
  }, 
  "aggs": {
    "avg_age_青海": {
      "avg": {
        "field": "age"
      }
    },
    "all": {
      "global": {},
      "aggs": {
        "avg_age_全国": {
          "avg": {
            "field": "age"
          }
        }
      }
    }
  }
    
}

# 先按省份分组，然后统计省份的平均/最大/最小年龄，再嵌套统计各个年龄的分布情况
GET /user_analysys_little/user_event/_search
{
  "size": 1, 
  "aggs": {
    "group_province": {
      "terms": {
        "field": "provinceName"
      },
      "aggs": {
        "avg_age": {
          "avg": {
            "field": "age"
          }
        },
        "max_age": {
          "max": {
            "field": "age"
          }
        },
        "min_age": {
          "min": {
            "field": "age"
          }
        },
        "group_age":{
          "terms": {
            "field": "age"
          }
        }
      }
    }
  }
}


#直方图, 以年龄间隔为10的区间进行统计，例如 [10~19] [20~29] .. 这样分区间分组统计
GET /user_analysys_little/user_event/_search
{
  "size": 1,
  "aggs": {
    "histogram_age": {
      "histogram": {
        "field": "age",
        "interval": 10
      },
      "aggs": {
        "max_age": {
          "max": {
            "field": "age"
          }
        },
        "min_age":{
          "min": {
            "field": "age"
          }
        },
        "avg_age":{
          "avg": {
            "field": "age"
          }
        }
      }
    }
  }
}


#直方图，专门的时间统计， 按天统计, extended_bounds可以设定起始边界
GET /user_analysys_little/user_event/_search
{
  "size": 1,
  "aggs": {
    "date_histogram_statDate": {
      "date_histogram": {
        "field": "statDate",
        "interval": "day",
        "format": "yyyy-MM-dd",
        "time_zone":"+08:00",
        "min_doc_count" : 0,
        "extended_bounds" : { 
            "min" : "2016-11-28",
            "max" : "2016-12-31"
        }
      }
    }
  }
}



#过滤桶, 使用过滤桶在查询范围基础上应用过滤器
#即搜索针对全青海的人，但聚合统计针对青海年龄在10~50的人
GET /user_analysys_little/user_event/_search
{
  "size": 1, 
  "query": {
    "term": {
      "provinceName": {
        "value": "青海"
      }
    }
  }, 
  "aggs": {
    "avg_age_青海": {
      "filter": {
        "range": {
          "age": {
            "gte": 10,
            "lte": 50
          }
        }
      },
      "aggs": {
        "avg_age": {
          "avg": {
            "field": "age"
          }
        }
      }
    }
  }
    
}

#后过滤器 只过滤搜索结果，不过滤聚合结果
#即搜索针对青海年龄在10~50的人，但聚合统计针对全青海的人
GET /user_analysys_little/user_event/_search
{
  "size": 1, 
  "query": {
    "term": {
      "provinceName": {
        "value": "青海"
      }
    }
  },
  "post_filter": {
    "range": {
      "age": {
        "gte": 10,
        "lte": 50
      }
    }
  }, 
  "aggs": {
    "avg_age":{
      "avg": {
        "field": "age"
      }
    }
    
  }
    
}

#去重, 统计每天有多少个不重复的用户 即 dau
#注意： cardinality是通过算法来做的近似计算，不是100%精确
#可以通过设置precision_threshold来调节精度， 接受 0–40,000 之间的数字，更大的值还是会被当作 40,000 来处理。 示例会确保当字段唯一值在 100 以内时会得到非常准确的结果。尽管算法是无法保证这点的，但如果基数在阈值以下，几乎总是 100% 正确的。高于阈值的基数会开始节省内存而牺牲准确度，同时也会对度量结果带入误差。
GET /user_analysys_little/user_event/_search
{
  "size": 1,
  "aggs": {
    "date_histogram_statDate": {
      "date_histogram": {
        "field": "statDate",
        "interval": "day",
        "format": "yyyy-MM-dd",
        "time_zone":"+08:00",
        "min_doc_count" : 0,
        "extended_bounds" : { 
            "min" : "2016-11-28",
            "max" : "2016-12-31"
        }
      },
      "aggs": {
        "distinct_userId": {
          "cardinality": {
            "field": "userId",
            "precision_threshold": 100
          }
        }
      }
    }
  }
}