步骤与代码参考:https://github.com/Hironsan/bertsearch
https://github.com/hanxiao/bert-as-service
https://towardsdatascience.com/semantics-at-scale-bert-elasticsearch-be5bce877859
待测试:使用中文预处理bert模型进行相似度搜索的准确度
以下为参考以上文献进行英文句子相似度搜索:
1.docker启动es7与bertservice
此步参考https://github.com/Hironsan/bertsearch中docker-compose up
version: '3.7'
services:
# web:
# build: ./web
# ports:
# - "5000:5000"
# environment:
# - INDEX_NAME
# depends_on:
# - elasticsearch
# - bertserving
# deploy:
# resources:
# limits:
# memory: 500M
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.7.1
ports:
- "9200:9200"
volumes:
- es-data:/usr/share/elasticsearch/data
tty: true
environment:
discovery.type: single-node
deploy:
resources:
limits:
memory: 1G
bertserving:
build: ./bertserving
ports:
- "5555:5555"
- "5556:5556"
environment:
- PATH_MODEL=${
PATH_MODEL}
volumes:
- "${PATH_MODEL}:/model"
deploy:
resources:
limits:
memory: 8G #bert-service运行需要高内存占用
volumes:
es-data:
driver: local
export PATH_MODEL=./cased_L-12_H-768_A-12
<
bert模型路径