python操作ElasticSearch--文档增删改查

最新推荐文章于 2024-09-23 11:29:56 发布

Cocktail_py

最新推荐文章于 2024-09-23 11:29:56 发布

阅读量2.3k

点赞数 2

分类专栏： Elasticsearch 文章标签： elasticsearch

本文链接：https://blog.csdn.net/Cocktail_py/article/details/99740388

版权

Elasticsearch 专栏收录该内容

24 篇文章 2 订阅

订阅专栏

安装 6.0.0版本

## pip install elasticsearch==6.0.0
# 导包
import json
from elasticsearch import Elasticsearch

创建Index – PUT /news?pretty

es = Elasticsearch(hosts="ip:port")
# 创建一个名为news的索引
result = es.indices.create(index='news', ignore=400)
print(result)
# 创建成功
# {'acknowledged': True, 'shards_acknowledged': True, 'index': 'news'}
# 如果再把代码执行一次的话，就会返回:  status为400,原因是Index已经存在
# {'error': {'root_cause': [{'type': 'resource_already_exists_exception', 'reason': 'index [news/RfnIRUh9Qxi-5aYlNEYilg] already exists', 'index_uuid': 'RfnIRUh9Qxi-5aYlNEYilg', 'index': 'news'}], 'type': 'resource_already_exists_exception', 'reason': 'index [news/RfnIRUh9Qxi-5aYlNEYilg] already exists', 'index_uuid': 'RfnIRUh9Qxi-5aYlNEYilg', 'index': 'news'}, 'status': 400}

删除Index – DELETE /news?pretty

result = es.indices.delete(index='news', ignore=[400, 404])
print(result)
# # 删除成功
# # {'acknowledged': True}
# # 如果再把代码执行一次的话，就会返回:  status为404,原因是Index已经被删除
# # {'error': {'root_cause': [{'type': 'index_not_found_exception', 'reason': 'no such index', 'resource.type': 'index_or_alias', 'resource.id': 'news', 'index_uuid': '_na_', 'index': 'news'}], 'type': 'index_not_found_exception', 'reason': 'no such index', 'resource.type': 'index_or_alias', 'resource.id': 'news', 'index_uuid': '_na_', 'index': 'news'}, 'status': 404}

删除Index下的type

POST news/politics/_delete_by_query?conflicts=proceed
{
  "query": {
    "match_all": {}
  }
}
body={
  "query": {
    "match_all": {}
  }
}
conflicts ="proceed"
result = es.delete_by_query(index="news", body=body, doc_type="politics",conflicts=conflicts)

快速检查集群的健康状况 GET /_cat/health?v

# result = es.cluster.health(wait_for_status='yellow', request_timeout=1)
result = es.cluster.health()
# 或者
result = es.cat.health()
"""
了解集群的健康状况？green、yellow、red？
green：每个索引的primary shard和replica shard都是active状态的
yellow：每个索引的primary shard都是active状态的，但是部分replica shard不是active状态，处于不可用的状态
red：不是所有索引的primary shard都是active状态的，部分索引有数据丢失了

为什么现在会处于一个yellow状态？

本人一台服务器，就启动了一个es进程，相当于就只有一个node。现在es中有一个index，就是kibana自己内置建立的index。由于默认的配置是给每个index分配5个primary shard和5个replica shard，而且primary shard和replica shard不能在同一台机器上（为了容错）。现在kibana自己建立的index是1个primary shard和1个replica shard。当前就一个node，所以只有1个primary shard被分配了和启动了，但是一个replica shard没有第二台机器去启动。
"""

插入数据 – PUT /index/type/id

PUT /news/politics/1
{
	'title': '美国留给伊拉克的是个烂摊子吗', 
	'url': 'http://view.news.qq.com/zt2011/usa_iraq/index.htm'
}
# 在插入数据的时候可以直接插入结构化字典数据
data = {'title': '美国留给伊拉克的是个烂摊子吗', 'url': 'http://view.news.qq.com/zt2011/usa_iraq/index.htm'}
# # index 参数代表了索引名称，doc_type 代表了文档类型，body 则代表了文档具体内容，id 则是数据的唯一标识 ID
result = es.create(index='news', doc_type='politics', id=1, body=data)
print(result)
# result 字段为 created，代表该数据插入成功
# {'_index': 'news', '_type': 'politics', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
# 也可以使用 index() 方法来插入数据，但与 create() 不同的是，create() 方法需要我们指定 id 字段来唯一标识该条数据，而 index() 方法则不需要，如果不指定 id，会自动生成一个 id
#create() 方法内部其实也是调用了 index() 方法，是对 index() 方法的封装
# es.index(index='news', doc_type='politics', body=data)

查找 – GET /index/type/id

# 查询index=news,type=politics
GET /news/politics/_search
# 查询所有index=news,type=politics,id=1
GET /news/politics/1
result = es.get(index='news', doc_type='politics', id=1)
print(result)

更新数据 – POST /news/politics/1/_update

#修改：更新文档
POST /news/politics/1/_update
{
	"doc":{
    	'date': '2011-12-16'
	}
}
data = {
	"doc":{
    	'date': '2011-12-16'
	}
}

# Validation Failed: 1: script or doc is missing;
# 数据增加了一个日期字段，然后调用了 update() 方法
result = es.update(index='news', doc_type='politics', id=1, body=data)
print(result)
# ## result 字段为 updated，即表示更新成功，另外我们还注意到有一个字段 _version，这代表更新后的版本号数，2 代表这是第二个版本，因为之前已经插入过一次数据，所以第一次插入的数据是版本 1，可以参见上例的运行结果，这次更新之后版本号就变成了 2，以后每更新一次，版本号都会加 1
# #{'_index': 'news', '_type': 'politics', '_id': '1', '_version': 2, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 2}

#修改：替换文档
PUT /news/politics/1
data = {
{
    'title': '美国留给伊拉克的是个烂摊子吗',
    'url': 'http://view.news.qq.com/zt2011/usa_iraq/index.htm',
    'date': '2011-12-16'
}
## index() 方法可以代替我们完成两个操作，如果数据不存在，那就执行插入操作，如果已经存在，那就执行更新操作，非常方便0
#es.index(index='news', doc_type='politics', body=data, id=1)

删除相应的数据 – DELETE /news/politics/1

result = es.delete(index='news', doc_type='politics', id=1)
print(result)
## result 字段为 deleted，代表删除成功，_version 变成了 3，又增加了 1
# {'_index': 'news', '_type': 'politics', '_id': '1', '_version': 3, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 2}
# 删除满足该条件的数据
query = {'query': {'match': {"_id": "BxCklGwBt0482SoSeXuE"}}}
result = es.delete_by_query(index='news',body=query, doc_type='politics')
print(result)

对于中文来说，需要安装一个分词插件，这里使用的是 elasticsearch-analysis-ik，GitHub 链接为：https://github.com/medcl/elasticsearch-analysis-ik，这里我们使用 Elasticsearch 的另一个命令行工具 elasticsearch-plugin 来安装，这里安装的版本是 6.0.0

在安装这个分词插件之后,需要重启elasticsearch

进入到安装elasticsearch的目录 cd /opt/elasticsearch-6.0.0/bin

 ./elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.0.0/elasticsearch-analysis-ik-6.0.0.zip

# 新建一个索引并指定需要分词的字段
mapping = {
   'properties': {
       'title': {
           'type': 'text',
           'analyzer': 'ik_max_word',
           'search_analyzer': 'ik_max_word'
       }
   }
}
# 先删除之前的索引
## mapping 信息中指定了分词的字段，指定了字段的类型 type 为 text，分词器 analyzer 和 搜索分词器 search_analyzer 为 ik_max_word，即使用我们刚才安装的中文分词插件。如果不指定的话则使用默认的英文分词器。
es.indices.delete(index='news', ignore=[400, 404])
es.indices.create(index='news', ignore=400)
result = es.indices.put_mapping(index='news', doc_type='politics', body=mapping)

插入数据操作

datas = [
   {
       "name": "gaolujie yagao",
       "desc": "gaoxiao meibai",
       "price": 30,
       "producer": "gaolujie producer",
       "tags": ["meibai", "fangzhu"]
   }
   ,
   {
       "name": "jiajieshi yagao",
       "desc": "youxiao fangzhu",
       "price": 25,
       "producer": "jiajieshi producer",
       "tags": ["fangzhu"]
   }
   ,
   {
       "name": "zhonghua yagao",
       "desc": "caoben zhiwu",
       "price": 40,
       "producer": "zhonghua producer",
       "tags": ["qingxin"]
   }
   ,
   {
       "name": "special yagao",
       "desc": "special meibai",
       "price": 50,
       "producer": "special yagao producer",
       "tags": ["qingxin"]
   }
   ,

]


# 批量插入
def gendata():
   for idx, da in enumerate(datas):
       idx += 1

       yield {
           "_index": "ecommerce",
           "_type": "product",
           "_id": idx,
           "_source": da
       }
# 插入几条新的数据
# for data in datas:
#     es.index(index='ecommerce', doc_type='product', body=data)
# 批量插入
from elasticsearch import helpers
result = helpers.bulk(es, gendata())
print(result)



# 批量插入数据库的时候可能会遇到的问题
# 'reason': 'blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];'},
body = {
         "index.blocks.read_only_allow_delete":"false"
}
result = es.indices.put_settings(body=body)
print(result)

全文检索

dsl = {
   'query': {
       'match': {
           'name': 'zhonghua'
       }
   }
}


result = es.search(index='ecommerce', doc_type='product', body=dsl)
print(json.dumps(result, indent=2, ensure_ascii=False))

# 多个字段查询
aim_kw = "zhonghua"
query = {
  "query":{
          "multi_match":{
              "query": aim_kw,
              "fields": ["name","producer"]
          }
  }
}
result = es.search(index='ecommerce', doc_type='product', body=query)
print(json.dumps(result, indent=2, ensure_ascii=False))

扩展

如何获取_id的最大值,注意_id为string类型

GET /index/doc_type/_search
{
  "stored_fields": [
    "_id"
  ],
  "query": {
    "match_all": {}
  },
  "sort": {
    "_id": "desc"
  },
  "size": 1
}

“”"
参考链接 https://cuiqingcai.com/6214.html
参考链接 https://www.cnblogs.com/liuxiaoming123/p/8124969.html
参考链接 https://blog.csdn.net/xuezhangjun0121/article/details/80745575
参考链接 https://blog.csdn.net/liuzemeeting/article/details/80708035
参考链接 https://www.jianshu.com/p/969e70782d1a
“”"