actions = []
# 更新某条数据,doc内是需要更新的字段
action = {
"_op_type": "update",
"_index": "index_name",
"_type": "_doc",
"_id": 'doc_id',
"doc": {
"name": 'lilei',
"age": 10
}
}
actions.append(action)
# 插入新数据,_source里是插入的数据的所有字段
action = {
'_index': "index_name",
'_type': '_doc',
'_id': 'doc_id',
'_source': {
"name": 'hanmeimei',
"age": 12,
"sex": "female"
}
}
actions.append(action)
# 更新新插入的数据
action = {
"_op_type": "update",
"_index": "index_name",
"_type": "_doc",
"_id": 'doc_id',
"doc": {
"name": 'hanmeimei2',
"age": 14
}
}
actions.append(action)
from elasticsearch import Elasticsearch, helpers
# ElasticSearch 客户端初始化
es_hosts = ['']
es = Elasticsearch(hosts=es_hosts, http_auth=('admin', 'admin'), timeout=180, max_retries=10, retry_on_timeout=True, maxsize=100)
# 批量同步数据
helpers.bulk(client=es, actions=actions)
更新已存在的数据和插入新数据是可以同时进行的,建议同时同步的数据不要过多,可能会出现问题。
对于同一批数据,在使用较少数据测试的时候,bulk中的actions是串行操作,中间出问题的数据不会影响后续操作。
下面是循环从es查询数据:
# 需要查询的数据
body = {
"query": {
"term": {
"id": 'xxxx'
}
}
}
query = es.search(index='index_name', body=body, scroll='5m', size=100)
results = query['hits']['hits'] # es查询出的结果第一页
total = query['hits']['total']['value'] # es查询出的结果总量
if not total:
print('total is 0')
return
scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
for i in range(0, int(total / 100) + 1):
# scroll参数必须指定否则会报错
query_scroll = es.scroll(scroll_id=scroll_id, scroll='5m')['hits']['hits']
results += query_scroll
time.sleep(0.1)