Elasticsearch python API教程

发呆的比目鱼

已于 2022-03-15 10:53:44 修改

阅读量815

点赞数 2

分类专栏：工具类文章标签： elasticsearch

于 2021-08-14 15:38:50 首次发布

本文链接：https://blog.csdn.net/weixin_42486623/article/details/119701472

版权

工具类专栏收录该内容

13 篇文章 0 订阅

订阅专栏

Elasticsearch python API教程

安装

pip install elasticsearch
pip install elasticsearch[async] #支持异步

链接

# 使用python操作ElasticSearch
from elasticsearch import Elasticsearch
# 连接ES
es = Elasticsearch([{'host':'xxx.xxx.xx.xxx','port':9200}], timeout=3600)
# 若需验证
# es = Elasticsearch(['xxx.xxx.xx.xxx'], http_auth=('xiao', '123456'), timeout=3600)

插入

# 插入
# 不指定id 自动生成
body = {"first_name":"xiao",
        "last_name":"xiao", 
        'age': 25, 
        'about': 'I love to go rock climbing', 
        'interests': ['game', 'play']}
es.index(index="megacorp",body=body)

# 指定IDwu
body={"first_name":"xiao1",
      "last_name":"wu1", 
      'age': 22, 
      'about': 'I not love to go rock climbing', 
      'interests': ['sleep', 'eat', 'apple']}
es.index(index="megacorp",id=4,body=body)

查询

# 查询
query = {
  "query": {
    "match_all": {}
  }
}
es.search(index="megacorp", body=query)

使用DSL语句查询

term 过滤–term主要用于精确匹配哪些值，比如数字，日期，布尔值或 not_analyzed 的字符串(未经切词的文本数据类型)

query = {
    "query": {
        "term":{
            'first_name': 'xiao'
        }
    }
}
es.search(index="megacorp", body=query)

# first_name 可能经过切词了
query = {
    "query": {
        "term":{
            'first_name': 'Jane'
        }
    }
}
es.search(index="megacorp", body=query)

terms 过滤–terms 跟 term 有点类似，但 terms 允许指定多个匹配条件。如果某个字段指定了多个值，那么文档需要一起去做匹配

query = {
    "query": {
        "terms":{
            'age': [32, 25]
        }
    }
}
es.search(index="megacorp", body=query)

# first_name 可能经过切词了
query = {
    "query": {
        "terms":{
            'first_name': ['xiao','John']
        }
    }
}
es.search(index="megacorp", body=query)

range 过滤–按照指定范围查找一批数据
- gt : 大于
- gte : 大于等于
- lt : 小于
- lte : 小于等于

query = {
    "query": {
        "range":{
            'age': {
                "gt":22
            }
        }
    }
}
es.search(index="megacorp", body=query)

exists 和 missing 过滤–查找文档中是否包含指定字段或没有某个字段，类似于SQL语句中的IS_NULL条件

query = {
    "query": {
        "exists":   {
            "field":    "first_name"
        }
    }
}
es.search(index="megacorp", body=query)

bool 过滤–合并多个过滤条件查询结果的布尔逻辑
- must :: 多个查询条件的完全匹配,相当于 and。
- must_not :: 多个查询条件的相反匹配，相当于 not。
- should :: 至少有一个查询条件匹配, 相当于 or。

query = {
    "query": {
         "bool": {
             "must": {
                 "term": { "_score": 1 },
                 "term": { "age": 22 }
                },
             }
         }
}
es.search(index="megacorp", body=query)

query = {
    "query": {
         "bool": {
             "must": {
                 "term": { "age": 32 }
                },
             "must_not":{
                 "exists":   {
                    "field":    "name"
                }
             }
        }
    }
}
es.search(index="megacorp", body=query)

match_all 查询–可以查询到所有文档，是没有查询条件下的默认语句。

query = {
    "query": {
         "match_all": {}
    }
}
es.search(index="megacorp", body=query)

match 查询–标准查询，不管你需要全文本查询还是精确查询基本上都要用到它。

# 做精确匹配搜索时，你最好用过滤语句，因为过滤语句可以缓存数据。
# match查询只能就指定某个确切字段某个确切的值进行搜索，而你要做的就是为它指定正确的字段名以避免语法错误。
query = {
    "query": {
        "match": {
            "about": "rock"
        }
    }
}
es.search(index="megacorp", body=query)

multi_match 查询–match查询的基础上同时搜索多个字段，在多个字段中同时查一个

query = {
    "query": {
        "multi_match": {
            "query": 'music',
             "fields": ["about","interests"]
        }
    }
}
es.search(index="megacorp", body=query)

bool 查询–与 bool 过滤相似，用于合并多个查询子句。不同的是，bool 过滤可以直接给出是否匹配成功，而bool 查询要计算每一个查询子句的 _score （相关性分值）。

# bool 查询 条件是查询， bool 过滤 条件是过滤
query = {
    "query": {
         "bool": {
             "must": {
                 "match": { "last_name": 'Smith' }
                },
             "must_not":{
                 "exists":   {
                    "field":    "name"
                }
             }
        }
    }
}
es.search(index="megacorp", body=query)

wildcards 查询–使用标准的shell通配符查询

query = {
    "query": {
        "wildcard": {
            "about": "ro*"
        }
    }
}
es.search(index="megacorp", body=query)

regexp 查询

query = {
    "query": {
        "regexp": {
            "about": "ro.*"
        }
    }
}
es.search(index="megacorp", body=query)

prefix 查询 – 以什么字符开头的

query = {
    "query": {
        "prefix": {
            "about": "I"
        }
    }
}
es.search(index="megacorp", body=query)

短语匹配(Phrase Matching) – 寻找邻近的几个单词

query = {
    "query": {
        "match_phrase": {
            "about": "I love"
        }
    }
}
es.search(index="megacorp", body=query)

统计查询功能

query = {
    "query": {
        "match_phrase": {
            "about": "I love"
        }
    }
}
es.count(index="megacorp", body=query)

删除数据

es.delete(index='megacorp', id='3fg8QHsBU301uoRTPrqs')

# delete_by_query：删除满足条件的所有数据，查询条件必须符合DLS格式
query = {
    "query": {
        "match": {
            "first_name": "xiao"
        }
    }
}
es.delete_by_query(index="megacorp", body=query)

更新

# 根据ID更新
doc_body = {
    'script': "ctx._source.remove('age')"
} 
es.update(index="megacorp", id=4, body=doc_body)

# 增加字段   
doc_body = {
    'script': "ctx._source.address = '合肥'"
} 
es.update(index="megacorp", id=4, body=doc_body)

# 修改部分字段
doc_body = {
    "doc": {"last_name": "xiao"}
}
es.update(index="megacorp", id=4, body=doc_body)

# update_by_query：更新满足条件的所有数据，写法同上删除和查询
query = {
    "query": {
        "match": {
            "last_name": "xiao"
        }
    },
    "script":{
        "source": "ctx._source.last_name = params.name;ctx._source.age = params.age",
        "lang": "painless",
        "params" : {
            "name" : "wang",
            "age": 100,
        },  
    }
}
es.update_by_query(index="megacorp", body=query)

欢迎关注公众号：

发呆的比目鱼

关注

2
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
0
评论
Elasticsearch python API教程

Elasticsearch python API教程安装pip install elasticsearchpip install elasticsearch[async] #支持异步链接# 使用python操作ElasticSearchfrom elasticsearch import Elasticsearch# 连接ESes = Elasticsearch([{'host':'xxx.xxx.xx.xxx','port':9200}], timeout=3600)# 若需验证# e
复制链接

扫一扫