Nodejs:mongo-connector同步mongo数据;使用elasticsearch搜索
我使用的是elasticsearch5.2.1版本的,对应的ik和pinyin也是5.2.1;
下载并安装elasticsearch;
下载并安装mongo-connector;
安装对应版本的doc_manager
配置mongo-connector的文件启动;
{
"mainAddress": "localhost:27018",
"oplogFile": "/var/log/oplog.timestamp",
"noDump": false,
"batchSize": -1,
"verbosity": 0,
"continueOnError": false,
"logging": {
"type": "file",
"filename": "/var/log/mongo-connector.log",
"__format": "%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
"__rotationWhen": "D",
"__rotationInterval": 1,
"__rotationBackups": 10,
"__type": "syslog",
"__host": "localhost:514"
},
"namespaces": {
"database.doc1": true,
"database.doc2": true
},
"docManagers": [
{
"docManager": "elastic2_doc_manager",
"targetURL": "user_name:pass@127.0.0.1:9200",
"__bulkSize": 1000,
"__uniqueKey": "_id",
"__autoCommitInterval": null,
"args": {
"clientOptions": {"timeout": 200}
}
}
]
}
配置完成后,在supervisord中配置启动命令:
mongo-connector -c /etc/mongo-connector-conf.json
安装ik和pinyin插件并创建映射;
sudo wget https://github.com/medcl/elasticsearch-anaysis-ik/releases/download/v5.2.1/elasticsearch-analysis-ik-5.2.1.zip
sudo wget https://github.com/medcl/elasticsearch-anaysis-pinyin/releases/download/v5.2.1/elasticsearch-analysis-pinyin-5.2.1.zip
具体安装见:elasticsearch-analysis-ik;
没有安装mvn请安装mvn;这里不做详细描述;
创建映射:
需注意:synonyms_path的当前位置为elasticsearch/config/
//若安装了认证插件x-pack,操作时需带上用户名和密码
curl -XPUT 'username:pass@localhost:9200/my_index?pretty' -H 'Content-Type: application/json' -d'
{
"settings": {
"analysis": {
"analyzer": {
"ik_smart_analyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"filter" : ["lowercase","synonym"]
},
"ik_max_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter" : ["lowercase","synonym"]
},
"pinyin_analyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"filter" : ["my_pinyin","word_delimiter"]
}
},
"filter" : {
"synonym" : {
"type" : "synonym",
"ignore_case":true,
"synonyms_path" : "analysis/synonym.txt"
},
"my_pinyin": {
"type": "pinyin",
"first_letter" : "none",
"padding_char": " "
}
}
}
}
}
'
analysis/synonym.txt文件:
js,javascript
番茄,西红柿
为字段应用分词,在fulltext中为字段设置的pinyin子字段,使用mongo-connector后,并未在其type中自动映射pinyin字段,需再为其type重新创建一次;
curl -XPOST http://username:pass@localhost:9200/my_index/fulltext/_mapping -d'
{
"fulltext": {
"_all": {
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"term_vector": "no",
"store": "false"
},
"properties": {
"title": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
},
"tags": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}
}'
curl -XPOST http://username:pass@localhost:9200/my_index/my_type/_mapping -d'
{
"_all": {
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"term_vector": "no",
"store": "false"
},
"properties": {
"title": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
},
"tags": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}'
在浏览器中测试分词效果
测试ik和同义词
http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=今天天气真好
http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=js
测试拼音:
http://localhost:9200/my_index/_analyze?analyzer=pinyin_analyzer&text=今天天气真好
启用mongo副本集,此命令只为本地测试用:
//创建副本集
sudo mongod --replSet rs0 --port 27018 --dbpath /data/mongodb/data27018
//启动副本集,此为示例,后面使用supervisord统一管理
sudo mongod --port 27018 --dbpath /data/mongodb/data27018 --replSet rs0
副本集启动成功后,进入mongo shell,初始化副本集
config = { _id:"rs0", members:[
{_id:0,host:"127.0.0.1:27018"}
]};
rs.initiate(config)
可使用rs.help()查看帮助。
安装supervisord,配置启动项:
详见:supervisord
安装好supervisord后,配置supervisord.conf:
;elasticsearch进程管理
[program:elasticsearch]
command = /usr/local/elasticsearch-5.2.1/bin/elasticsearch
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/es.err.log
stdout_logfile=/var/log/es.out.log
;mongo副本集进程管理
[program:mongo-replset]
command =/usr/bin/mongod --port 27018 --dbpath /data/mongodb/data27018 --replSet rs0
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_replset.err.log
stdout_logfile=/var/log/mongo_replset.out.log
;mongo-connector进程管理
[program:mongo-connector]
command =mongo-connector -c /etc/mongo-connector-conf.json
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_connector.err.log
stdout_logfile=/var/log/mongo_connector.out.log
;主程序
[program:myapp]
command = node server.js
environment=NODE_ENV=development ;环境变量:开发环境
user = lele
directory = /Users/wo/WebstormProjects/myapp
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/myapi.err.log
stdout_logfile=/var/log/myapi.out.log
注意:配置文件中的log文件需要手动创建;
启动守护程序
sudo supervisord -c /etc/supervisord.conf
启动所有服务
sudo supervisorctl restart all
//启动单个服务
sudo supervisorctl restart elasticsearch
若启动失败,可查看supervisor日志排错;
启动成功后,可根据elasticsearch的log文件查看运行状态以及是否同步;
测试查询
curl -XGET 'http://localhost:9200/database/col1/_search?q=tags:text'
node端的查询
安装 elasticsearch-js
API: elasticsearch-js API:
一个例子:
//连接elasticsearch
var elasticsearch = require('elasticsearch');
var client = new elasticsearch.Client({
host: 'localhost:9200',
log: 'trace'
});
exports.search = function (word,size, callback) {
//查询 status==100 AND (visible==v1 OR visible==v2)
//^表示elasticsearch boost运算符
//~表示elasticsearch fuzzy运算符
var query_str = "status:100 AND (visible:v1 OR visible:v2) AND 西红柿^20好吃吗 ~1 NOT _id:00001"
var body = {
size:size,
query: {
query_string: {
//查询的字段
fields: ["title^20", "tags^15","title.pinyin", "tags.pinyin"],
query: query_str,
use_dis_max: true
}
},
//设置返回
_source: ["title", "tags", "digest", "top"],
//设置排序
sort:[
"_score",
{ "top" : "desc" }
],
//设置高亮
highlight: {
pre_tags: ['<em>'],
post_tags: ['</em>'],
fields: {
title: {},
condition: {},
digest: {}
},
require_field_match: false
}
};
client.search({
index: 'my_index',
type: 'my_type',
body: body
}).then(function (resp) {
callback(resp.hits);
}, function (err) {
console.trace(err.message);
callback(null);
});
};
写的比较简单,以代码为主,给大家一个参考;