python 操作 Elasticsearch
插入
from elasticsearch import Elasticsearch
es = Elasticsearch(hosts="127.0.0.1", port=9200, http_auth=('elastic', '123456'), timeout=3600)
result = es.index(index="test",body={"first_name":"xiao","last_name":"xiao", 'age': 25, 'about': 'I love to go rock climbing', 'interests': ['game', 'play']})
print(result)
查询
from elasticsearch import Elasticsearch
# 1、根据 id
data = es.get(index='test', id="4HX_G3kBA30Zdm11StfX")
# 2、查询必须存在某key的数据 && 必须不存在某key的数据 (must 和 must_not)
doc = {
'size' : 10000,
'query': {
'bool': {
'must_not': {
'exists': {"field": "is_download"}
}
}
}
}
data = es.search(index="test", body=doc)
遍历
from elasticsearch import Elasticsearch
es = Elasticsearch(hosts="192.168.10.175", port=9200, http_auth=('test', 'test'), timeout=3600)
get_size = 5000
earlist_time = "2021-04-27 16:19:08"
#latest_time = "2021-04-26 02:50:33"
body = {
"size": get_size,
"query": {
"range": {
"record_date.keyword": {
"gte": earlist_time,
#"lte": latest_time,
}
}
},
"sort": {
"record_date.keyword": "asc"
},
}
filter_path = ["_scroll_id","hits.total", "hits.hits._id", "hits.hits._source.record_date", "hits.hits._source.relocal_url","hits.hits._source.website"]
#print(es.indices.get_alias().keys())
results = es.search(index="image_info", body=body, filter_path=filter_path, scroll='5m')
scroll_size = len(results['hits']['hits'])
total_size = scroll_size
sid = results['_scroll_id']
total = results.get("hits").get("total").get("value")
while scroll_size > 0:
items = []
for result in results['hits']['hits']:
print(result)
print(total_size, suc, repeat, total)
url = result.get("_source").get("relocal_url")
record_date = result.get("_source").get("record_date")
ids = result.get("_id")
website = result.get("_source").get("website")
redis_cluster_proxy.lpush("download:image:es.all",*items)
print(total_size, suc, repeat, total)
results = es.scroll(scroll_id=sid, scroll='5m')
sid = results['_scroll_id']
scroll_size = len(results['hits']['hits'])
total_size += scroll_size
更新
新增字段更新
新增 is_download 字段
doc_body = {
'script': "ctx._source.is_download = 1"
}
es.update(index='test', id="4HX_G3kBA30Zdm11StfX",body = doc_body)