1.批量写入
- 如果有大量的数据,一次插入一条肯定效率太慢,我们可以使用elasticsearch模块导入helper,通过helper.bluk 来批量处理大量的数据。首先我们将所有的数据定义成字典形式,各字段含义如下:
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch(**es_settings)
actions = [{
'_index': index,
'_type': doc_type,
'_source': item_dict0
},
{'_index': index,'_type': doc_type, '_source': item_dict1},
{'_index': index,'_type': doc_type, '_source': item_dict2},
......
]
helpers.bluk(es, actions, stats_only=True,raise_on_error=False, raise_on_exception=False)
2.单例模式写入
from elasticsearch import Elasticsearch, helpers
class SingleES(object):
_instance = None
client = None
actions = []
def __init__(self):
self.client = Elasticsearch(hosts=['10.10.10.165:9200'],timeout=60)
self.actions_length = 50
@staticmethod
def instance():
if not SingleES._instance:
SingleES._instance = SingleES()
return SingleES._instance
def storage(self, item_dict):
action = {
'_index': 'index',
'_type': 'doc_type',
'_source': item_dict
}
self.actions.append(action)
if len(self.actions) >= self.actions_length :
try:
helpers.bulk(self.client, self.actions, stats_only=True,
raise_on_error=False, raise_on_exception=False)
except:
logging.error("ElasticSearch bulk data fail!")
finally:
self.actions = []
3.多线程写入,使用线程锁
from elasticsearch import Elasticsearch, helpers
import threading
class SingleES(object):
_instance = None
client = None
actions = []
_instance_lock = threading.Lock()
def __init__(self):
self.client = Elasticsearch(hosts=['10.10.10.165:9200'],timeout=60)
self.actions_length = 50
@staticmethod
def instance():
if not SingleES._instance:
SingleES._instance = SingleES()
return SingleES._instance
def storage(self, item_dict):
action = {
'_index': 'index',
'_type': 'doc_type',
'_source': item_dict
}
self.actions.append(action)
if len(self.actions) >= self.actions_length :
with SingleES._instance_lock:
if len(self.actions) >= self.actions_length :
try:
helpers.bulk(self.client, self.actions, stats_only=True,
raise_on_error=False, raise_on_exception=False)
except:
logging.error("ElasticSearch bulk data fail!")
finally:
self.actions = []
if __name__ == '__main__':
es_db = SingleES.instance()
for i in range(10000):
es_db.storage({'name': 'zhang', 'age':123})