我使用的是elasticsearch5.2.1版本的,对应的ik和pinyin也是5.2.1;
1,下载并安装elasticsearch;
见:https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html
2,下载并安装mongo-connector;
见:https://github.com/mongodb-labs/mongo-connector
pip install mongo-connector
Target System | Install Command |
---|---|
MongoDB |
|
Elasticsearch 1.x |
|
Amazon Elasticsearch 1.x Service |
|
Elasticsearch 2.x |
|
Amazon Elasticsearch 2.x Service |
|
Elasticsearch 5.x |
|
Solr |
|
3,安装对应版本的doc_manager
Elasticsearch 1.x: https://github.com/mongodb-labs/elastic-doc-manager
Elasticsearch 2.x and 5.x: https://github.com/mongodb-labs/elastic2-doc-manager
配置mongo-connector的文件启动;
{
"mainAddress": "localhost:27018",
"oplogFile": "/var/log/oplog.timestamp",
"noDump": false,
"batchSize": -1,
"verbosity": 0,
"continueOnError": false,
"logging": {
"type": "file",
"filename": "/var/log/mongo-connector.log",
"__format": "%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
"__rotationWhen": "D",
"__rotationInterval": 1,
"__rotationBackups": 10,
"__type": "syslog",
"__host": "localhost:514"
},
"namespaces": {
"database.doc1": true,
"database.doc2": true
},
"docManagers": [
{
"docManager": "elastic2_doc_manager",
"targetURL": "user_name:pass@127.0.0.1:9200",
"__bulkSize": 1000,
"__uniqueKey": "_id",
"__autoCommitInterval": null,
"args": {
"clientOptions": {"timeout": 200}
}
}
]
}
配置完成后,在supervisord中配置启动命令:
mongo-connector -c /etc/mongo-connector-conf.json
见:7,安装supervisord,配置启动项:
4,安装ik和pinyin插件并创建映射;
sudo wget https://github.com/medcl/elasticsearch-anaysis-ik/releases/download/v5.2.1/elasticsearch-analysis-ik-5.2.1.zip
sudo wget https://github.com/medcl/elasticsearch-anaysis-pinyin/releases/download/v5.2.1/elasticsearch-analysis-pinyin-5.2.1.zip
具体安装见:https://github.com/medcl/elasticsearch-analysis-ik;
没有安装mvn请安装mvn;这里不做详细描述;
创建映射:
需注意:synonyms_path的当前位置为elasticsearch/config/
//若安装了认证插件x-pack,操作时需带上用户名和密码
curl -XPUT 'username:pass@localhost:9200/my_index?pretty' -H 'Content-Type: application/json' -d'
{
"settings": {
"analysis": {
"analyzer": {
"ik_smart_analyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"filter" : ["lowercase","synonym"]
},
"ik_max_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter" : ["lowercase","synonym"]
},
"pinyin_analyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"filter" : ["my_pinyin","word_delimiter"]
}
},
"filter" : {
"synonym" : {
"type" : "synonym",
"ignore_case":true,
"synonyms_path" : "analysis/synonym.txt"
},
"my_pinyin": {
"type": "pinyin",
"first_letter" : "none",
"padding_char": " "
}
}
}
}
}
'
analysis/synonym.txt文件:
js,javascript
番茄,西红柿
为字段应用分词,在fulltext中为字段设置的pinyin子字段,使用mongo-connector后,并未在其type中自动映射pinyin字段,需再为其type重新创建一次;
curl -XPOST http://username:pass@localhost:9200/my_index/fulltext/_mapping -d'
{
"fulltext": {
"_all": {
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"term_vector": "no",
"store": "false"
},
"properties": {
"title": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
},
"tags": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}
}'
curl -XPOST http://username:pass@localhost:9200/my_index/my_type/_mapping -d'
{
"_all": {
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"term_vector": "no",
"store": "false"
},
"properties": {
"title": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
},
"tags": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}'
5,在浏览器中测试分词效果
测试ik和同义词
http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=今天天气真好
http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=js
测试拼音:
http://localhost:9200/my_index/_analyze?analyzer=pinyin_analyzer&text=今天天气真好
6,启用mongo副本集,此命令只为本地测试用:
//创建副本集
sudo mongod --replSet rs0 --port 27018 --dbpath /data/mongodb/data27018
//启动副本集,此为示例,后面使用supervisord统一管理
sudo mongod --port 27018 --dbpath /data/mongodb/data27018 --replSet rs0
副本集启动成功后,进入mongo shell,初始化副本集
config = { _id:"rs0", members:[
{_id:0,host:"127.0.0.1:27018"}
]};
rs.initiate(config)
可使用rs.help()查看帮助。
7,安装supervisord,配置启动项:
安装好supervisord后,配置supervisord.conf:
;elasticsearch进程管理
[program:elasticsearch]
command = /usr/local/elasticsearch-5.2.1/bin/elasticsearch
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/es.err.log
stdout_logfile=/var/log/es.out.log
;mongo副本集进程管理
[program:mongo-replset]
command =/usr/bin/mongod --port 27018 --dbpath /data/mongodb/data27018 --replSet rs0
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_replset.err.log
stdout_logfile=/var/log/mongo_replset.out.log
;mongo-connector进程管理
[program:mongo-connector]
command =mongo-connector -c /etc/mongo-connector-conf.json
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_connector.err.log
stdout_logfile=/var/log/mongo_connector.out.log
;主程序
[program:myapp]
command = node server.js
environment=NODE_ENV=development ;环境变量:开发环境
user = lele
directory = /Users/wo/WebstormProjects/myapp
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/myapi.err.log
stdout_logfile=/var/log/myapi.out.log
注意:配置文件中的log文件需要手动创建;
启动supervisord:
//启动守护程序
sudo supervisord -c /etc/supervisord.conf
//启动所有服务
sudo supervisorctl restart all
//启动单个服务
sudo supervisorctl restart elasticsearch
若启动失败,可查看supervisor日志排错;
启动成功后,可根据elasticsearch的log文件查看运行状态以及是否同步;
8,测试查询
curl -XGET 'http://localhost:9200/database/col1/_search?q=tags:text'
9,node端的查询
安装elasticsearch-js:https://github.com/elastic/elasticsearch-js
elasticsearch-js API:
https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html
连接elasticsearch
var elasticsearch = require('elasticsearch');
var client = new elasticsearch.Client({
host: 'localhost:9200',
log: 'trace'
});
一个例子:
exports.search = function (word,size, callback) {
//查询 status==100 AND (visible==v1 OR visible==v2)
//^表示elasticsearch boost运算符
//~表示elasticsearch fuzzy运算符
var query_str = "status:100 AND (visible:v1 OR visible:v2) AND 西红柿^20好吃吗 ~1 NOT _id:00001"
var body = {
size:size,
query: {
query_string: {
//查询的字段
fields: ["title^20", "tags^15","title.pinyin", "tags.pinyin"],
query: query_str,
use_dis_max: true
}
},
//设置返回
_source: ["title", "tags", "digest", "top"],
//设置排序
sort:[
"_score",
{ "top" : "desc" }
],
//设置高亮
highlight: {
pre_tags: ['<em>'],
post_tags: ['</em>'],
fields: {
title: {},
condition: {},
digest: {}
},
require_field_match: false
}
};
client.search({
index: 'my_index',
type: 'my_type',
body: body
}).then(function (resp) {
callback(resp.hits);
}, function (err) {
console.trace(err.message);
callback(null);
});
};
写的比较简单,以代码为主,给大家一个参考;
参考资料:
elasticsearch query string:
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
elasticsearch-js API:
https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html
supervisord:http://supervisord.org
mongodb 最新版本高可用解决方案-replica sets副本集部署详细过程
使用 Elasticsearch 实现博客站内搜索: