from elasticsearch import Elasticsearch
from elasticsearch import helpers
import datetime
from ES import setting
import time
#定义es链接
def conn_to_es():
client = Elasticsearch(hosts=["http://{0}:{1}".format(setting.ES_HOST, setting.ES_PORT)],
http_auth=(setting.ES_USER, setting.ES_PASSWORD))
return client
es = conn_to_es()
#定义读取es函数
def read_es(index,query=""):
if es.ping():
print("Successfully connect!")
else:
print("Failed.....")
exit()
if query == "": # query为es的搜索条件
query = {
"query": {
"match_all": {}
},
# "size":1000
}
res = helpers.scan(es, index=index, scroll="20m", query=query)
return res
# 连接到集群,提供节点,不一定要全部节点
# es = Elasticsearch(["172.16.208.149:9200"])
# 打开文件准备读取数据
file_name = 'test.txt'
wbfile = open(file_name, 'r', encoding='utf-8', errors='ignore')
actions = []
# 循环每一行,写入数据
for line in wbfile:
fields = line.split('#')
# print(fields)
# print(fields[2])
action = {
"_index": "estest_2019_04_19",
"_type": "_doc",
"_id": int(fields[0]),
"_source": {"sid": int(fields[0]),
"c@receive_time": datetime.datetime.strptime("2020-04-19 16:30:16", "%Y-%m-%d %H:%M:%S"),
"sname": fields[1],
"sage": int(fields[2]),
"score": int(fields[3])
}
}
actions.append(action)
# 每1完条批量导入一次
if len(actions)==1000:
# helper批量导入
helpers.bulk(es, actions)
actions = []
print("insert 10000")
if len(actions) > 0:
# 导入最后剩余的数据
helpers.bulk(es, actions)
print("finish")
time.sleep(10)
#定义读es
data=read_es("estest_2019_04_19",query="")
for i in data:
print(i) #i是一个字典类型