Nodejs:mongo-connector同步mongo数据;使用elasticsearch搜索

我使用的是elasticsearch5.2.1版本的,对应的ik和pinyin也是5.2.1;

1,下载并安装elasticsearch;

    见:https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html

2,下载并安装mongo-connector;

    见:https://github.com/mongodb-labs/mongo-connector

pip install mongo-connector

Target System

Install Command

MongoDB

pip install mongo-connector

Elasticsearch 1.x

pip install 'mongo-connector[elastic]'

Amazon Elasticsearch 1.x Service

pip install 'mongo-connector[elastic-aws]'

Elasticsearch 2.x

pip install 'mongo-connector[elastic2]'

Amazon Elasticsearch 2.x Service

pip install 'mongo-connector[elastic2-aws]'

Elasticsearch 5.x

pip install 'mongo-connector[elastic5]'

Solr

pip install 'mongo-connector[solr]'

3,安装对应版本的doc_manager

Elasticsearch 1.x: https://github.com/mongodb-labs/elastic-doc-manager

Elasticsearch 2.x and 5.x: https://github.com/mongodb-labs/elastic2-doc-manager

配置mongo-connector的文件启动;

{

    "mainAddress": "localhost:27018",
    "oplogFile": "/var/log/oplog.timestamp",
    "noDump": false,
    "batchSize": -1,
    "verbosity": 0,
    "continueOnError": false,

    "logging": {
        "type": "file",
        "filename": "/var/log/mongo-connector.log",
        "__format": "%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
        "__rotationWhen": "D",
        "__rotationInterval": 1,
        "__rotationBackups": 10,

        "__type": "syslog",
        "__host": "localhost:514"
    },

    "namespaces": {
        "database.doc1": true,
        "database.doc2": true
    },

    "docManagers": [
        {
            "docManager": "elastic2_doc_manager",
            "targetURL": "user_name:pass@127.0.0.1:9200",
            "__bulkSize": 1000,
            "__uniqueKey": "_id",
            "__autoCommitInterval": null,
            "args": {
                "clientOptions": {"timeout": 200}
            }
        }
    ]
}

配置完成后,在supervisord中配置启动命令:

mongo-connector -c /etc/mongo-connector-conf.json

见:7,安装supervisord,配置启动项:

 

4,安装ik和pinyin插件并创建映射;

sudo wget https://github.com/medcl/elasticsearch-anaysis-ik/releases/download/v5.2.1/elasticsearch-analysis-ik-5.2.1.zip
 sudo wget https://github.com/medcl/elasticsearch-anaysis-pinyin/releases/download/v5.2.1/elasticsearch-analysis-pinyin-5.2.1.zip

具体安装见:https://github.com/medcl/elasticsearch-analysis-ik

没有安装mvn请安装mvn;这里不做详细描述;

创建映射:

    需注意:synonyms_path的当前位置为elasticsearch/config/

//若安装了认证插件x-pack,操作时需带上用户名和密码
curl -XPUT 'username:pass@localhost:9200/my_index?pretty' -H 'Content-Type: application/json' -d'
{
    "settings": {
        "analysis": {
            "analyzer": {
                "ik_smart_analyzer": {
                    "type":      "custom",
                    "tokenizer": "ik_smart",
                    "filter" : ["lowercase","synonym"]
                },
                "ik_max_analyzer": {
                    "type":      "custom",
                    "tokenizer": "ik_max_word",
                    "filter" : ["lowercase","synonym"]
                },
                "pinyin_analyzer": {
                    "type":      "custom",
                    "tokenizer": "ik_smart",
                    "filter" : ["my_pinyin","word_delimiter"]
                }
            },
            "filter" : {
                "synonym" : {
                    "type" : "synonym",
                     "ignore_case":true,  
                    "synonyms_path" : "analysis/synonym.txt"
                },
                "my_pinyin": {
                    "type": "pinyin",
                    "first_letter" : "none",
                    "padding_char": " "
                }
            }
        }
    }
}
'

analysis/synonym.txt文件:

js,javascript
番茄,西红柿

为字段应用分词,在fulltext中为字段设置的pinyin子字段,使用mongo-connector后,并未在其type中自动映射pinyin字段,需再为其type重新创建一次;


curl -XPOST http://username:pass@localhost:9200/my_index/fulltext/_mapping -d'
{
    "fulltext": {
         "_all": {
            "analyzer": "ik_smart_analyzer",
            "search_analyzer": "ik_smart_analyzer",
            "term_vector": "no",
            "store": "false"
        },
        "properties": {
            "title": {
                "type": "text",
                "analyzer": "ik_smart_analyzer",
                "search_analyzer": "ik_smart_analyzer",
                "include_in_all": "true",
                "boost": 10,
                "fields": {
                    "pinyin": {
                        "type": "text",
                        "store": "no",
                        "term_vector": "with_offsets",
                        "analyzer": "pinyin_analyzer",
                        "boost": 10
                    }
                }
            },
            "tags": {
                "type": "text",
                "analyzer": "ik_smart_analyzer",
                "search_analyzer": "ik_smart_analyzer",
                "include_in_all": "true",
                "boost": 10,
                "fields": {
                    "pinyin": {
                        "type": "text",
                        "store": "no",
                        "term_vector": "with_offsets",
                        "analyzer": "pinyin_analyzer",
                        "boost": 10
                    }
                }
            }
        }
    }
}'

curl -XPOST http://username:pass@localhost:9200/my_index/my_type/_mapping -d'
{
    "_all": {
        "analyzer": "ik_smart_analyzer",
        "search_analyzer": "ik_smart_analyzer",
        "term_vector": "no",
        "store": "false"
    },
    "properties": {
        "title": {
            "type": "text",
            "analyzer": "ik_smart_analyzer",
            "search_analyzer": "ik_smart_analyzer",
            "include_in_all": "true",
            "boost": 10,
            "fields": {
                "pinyin": {
                    "type": "text",
                    "store": "no",
                    "term_vector": "with_offsets",
                    "analyzer": "pinyin_analyzer",
                    "boost": 10
                }
            }
        },
        "tags": {
            "type": "text",
            "analyzer": "ik_smart_analyzer",
            "search_analyzer": "ik_smart_analyzer",
            "include_in_all": "true",
            "boost": 10,
            "fields": {
                "pinyin": {
                    "type": "text",
                    "store": "no",
                    "term_vector": "with_offsets",
                    "analyzer": "pinyin_analyzer",
                    "boost": 10
                }
            }
        }
    }
}'

5,在浏览器中测试分词效果

测试ik和同义词

http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=今天天气真好

http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=js

测试拼音:

http://localhost:9200/my_index/_analyze?analyzer=pinyin_analyzer&text=今天天气真好

6,启用mongo副本集,此命令只为本地测试用:

//创建副本集
sudo mongod --replSet rs0 --port 27018 --dbpath /data/mongodb/data27018

//启动副本集,此为示例,后面使用supervisord统一管理
sudo mongod --port 27018 --dbpath  /data/mongodb/data27018 --replSet rs0

副本集启动成功后,进入mongo shell,初始化副本集

config = { _id:"rs0", members:[
 {_id:0,host:"127.0.0.1:27018"}
]};

 

rs.initiate(config)

可使用rs.help()查看帮助。

7,安装supervisord,配置启动项:

详见:http://supervisord.org

安装好supervisord后,配置supervisord.conf:

;elasticsearch进程管理
[program:elasticsearch]
command = /usr/local/elasticsearch-5.2.1/bin/elasticsearch
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/es.err.log
stdout_logfile=/var/log/es.out.log

;mongo副本集进程管理
[program:mongo-replset]
command =/usr/bin/mongod --port 27018 --dbpath  /data/mongodb/data27018 --replSet rs0
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_replset.err.log
stdout_logfile=/var/log/mongo_replset.out.log

;mongo-connector进程管理
[program:mongo-connector]
command =mongo-connector -c /etc/mongo-connector-conf.json
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_connector.err.log
stdout_logfile=/var/log/mongo_connector.out.log

;主程序
[program:myapp]
command = node server.js
environment=NODE_ENV=development   ;环境变量:开发环境
user = lele
directory = /Users/wo/WebstormProjects/myapp
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/myapi.err.log
stdout_logfile=/var/log/myapi.out.log

 

注意:配置文件中的log文件需要手动创建;

启动supervisord:

//启动守护程序
sudo supervisord -c /etc/supervisord.conf
//启动所有服务
sudo supervisorctl restart all

//启动单个服务
sudo supervisorctl restart elasticsearch

若启动失败,可查看supervisor日志排错;

启动成功后,可根据elasticsearch的log文件查看运行状态以及是否同步;

8,测试查询

curl -XGET 'http://localhost:9200/database/col1/_search?q=tags:text'

9,node端的查询

安装elasticsearch-js:https://github.com/elastic/elasticsearch-js

elasticsearch-js API:

https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html

连接elasticsearch

var elasticsearch = require('elasticsearch');
var client = new elasticsearch.Client({
  host: 'localhost:9200',
  log: 'trace'
});

一个例子:

exports.search = function (word,size, callback) {

    //查询 status==100 AND (visible==v1 OR visible==v2)
    //^表示elasticsearch boost运算符
    //~表示elasticsearch fuzzy运算符
    var query_str = "status:100 AND (visible:v1 OR visible:v2) AND 西红柿^20好吃吗 ~1 NOT _id:00001"
    
    var body = {
        size:size,
        query: {
            query_string: {
                //查询的字段
                fields: ["title^20", "tags^15","title.pinyin", "tags.pinyin"],
                query: query_str,
                use_dis_max: true
            }
        },
        //设置返回
        _source: ["title", "tags", "digest", "top"],
        //设置排序
        sort:[
            "_score",
            { "top" : "desc" }
        ],
        //设置高亮
        highlight: {
            pre_tags: ['<em>'],
            post_tags: ['</em>'],
            fields: {
                title: {},
                condition: {},
                digest: {}
            },
            require_field_match: false
        }
    };

    client.search({
        index: 'my_index',
        type: 'my_type',
        body: body
    }).then(function (resp) {
        callback(resp.hits);
    }, function (err) {
        console.trace(err.message);
        callback(null);
    });
};

写的比较简单,以代码为主,给大家一个参考;

参考资料:

elasticsearch query string:

https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html

elasticsearch-js API:

https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html

supervisord:http://supervisord.org

mongodb 最新版本高可用解决方案-replica sets副本集部署详细过程

ElasticSearch5中文分词(IK)

使用 Elasticsearch 实现博客站内搜索:

https://imququ.com/post/elasticsearch.html

转载于:https://my.oschina.net/tianlele/blog/848860

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值