![cfdca6e85382752b52d59b0c49bf22f4.png](https://img-blog.csdnimg.cn/img_convert/cfdca6e85382752b52d59b0c49bf22f4.png)
最近使用ES做召回的一种工具,将实践分享给大家
1、ES安装和启动:
请参考博文:
MacOS中Elasticsearch的安装「借助Homebrew」 - niceyoo - 博客园www.cnblogs.com![15f3a37af35ee9595e7ce2108b5f3495.png](https://img-blog.csdnimg.cn/img_convert/15f3a37af35ee9595e7ce2108b5f3495.png)
2、创建索引:
from elasticsearch import Elasticsearch
es = Elasticsearch(['127.0.0.1:9200'])
mappings = {
"mappings": {
"properties": {
"sp_id": {
"type": "long",
"index": "false"
},
"product_name": {
"type": "text",
"index": True
},
"third_category_id": {
"type": "long",
"index": True
}
}
}
}
res = es.indices.create(index='cate_index', body=mappings)
# 判断索引有没有写入成功
if es.indices.exists("cate_index"):
print('索引存在')
else:
print('索引不存在')
3、数据写入ES:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
es = Elasticsearch(['127.0.0.1:9200'])
ACTIONS = []
file = open("数据.txt")
for line in file.readlines():
line = line.strip("n")
line_arr = line.split("t")
print(line_arr)
if line_arr[2] == 'NULL':
continue
action = {
"_index": "cate_index",
"_source": {
"sp_id": line_arr[0],
"product_name": line_arr[1],
"third_category_id": line_arr[2]
}
}
ACTIONS.append(action)
res, _ = bulk(es, ACTIONS, index="cate_index", raise_on_error=True)
注:使用bulk写入可以快速提高写入的速度,比使用index写入要快一个数量级
4、ES查询:
from elasticsearch import Elasticsearch
es = Elasticsearch(['127.0.0.1:9200'])
body = {
"query": {
"match": {
"product_name": product_name
}
}
}
sort = {
"_score": {
"order": "desc"
}
}
res = es.search(index="cate_index", body=body, size=50, sort=sort)
print(res)