Elasticsearch 同义词(dynamic-synonym)远程数据库加载

说明

  • Elasticsearch 版本7.2.0
  • 同义词插件:elasticsearch-analysis-dynamic-synonym
  • 无停机动态远程更新同义词
1、下载同义词插件

下载地址:

https://github.com/bells/elasticsearch-analysis-dynamic-synonym

dynamic synonym versionES version
master7.x -> master
6.1.46.1.4
5.2.05.2.0
5.1.15.1.1
2.3.02.3.0
2.2.02.2.0
2.1.02.1.0
2.0.02.0.0
1.6.01.6.X

Elasticsearch 的插件需要版本号进行对应,所以下载下同义词插件后,需要重新进行编译:
修改 pom.xml
在这里插入图片描述

2、重写远程词库加载类
2.1 新建 DBRemoteSynonymFile.java 文件

说明:这里主要是对 LocalSynonymFile 及 RemoteSynonymFile 类进行仿写
主要有三个function :

  • reloadSynonymMap 重新加载同义词
  • isNeedReloadSynonymMap 重新加载同义词的条件
  • getReader 同义词的来源
package com.bellszhu.elasticsearch.plugin.synonym.analysis;

import com.bellszhu.elasticsearch.plugin.DynamicSynonymPlugin;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.env.Environment;

import java.io.*;
import java.nio.file.Path;
import java.sql.*;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Properties;

/**
 * @author 
 * @description //从DB数据库拉取同义词数据
 * @date 2019/8/27
 */
public class DBRemoteSynonymFile implements SynonymFile {
	
	// 配置文件名
    private final static String DB_PROPERTIES = "jdbc-reload.properties";
    private static Logger logger = LogManager.getLogger("dynamic-synonym");

    private String format;

    private boolean expand;

    private Analyzer analyzer;

    private Environment env;

    // 数据库配置
    private String location;

    private long lastModified;

    private Connection connection = null;

    private Statement statement = null;

    private Properties props;

    private Path conf_dir;

    DBRemoteSynonymFile(Environment env, Analyzer analyzer,
                        boolean expand, String format, String location) {
        this.analyzer = analyzer;
        this.expand = expand;
        this.format = format;
        this.env = env;
        this.location = location;
        this.props = new Properties();

		//读取当前 jar 包存放的路径
        Path filePath = PathUtils.get(new File(DynamicSynonymPlugin.class.getProtectionDomain().getCodeSource()
                .getLocation().getPath())
                .getParent(), "config")
                .toAbsolutePath();
        this.conf_dir = filePath.resolve(DB_PROPERTIES);
        
        //判断文件是否存在
        File configFile = conf_dir.toFile();
        InputStream input = null;
        try {
            input = new FileInputStream(configFile);
        } catch (FileNotFoundException e) {
            logger.info("jdbc-reload.properties not find. " + e);
        }
        if (input != null) {
            try {
                props.load(input);
            } catch (IOException e) {
                logger.error("fail to load the jdbc-reload.properties," + e);
            }
        }
        isNeedReloadSynonymMap();
    }

	/**
     * 加载同义词词典至SynonymMap中
     * @return SynonymMap
     */
    @Override
    public SynonymMap reloadSynonymMap() {
        try {
            logger.info("start reload local synonym from {}.", location);
            Reader rulesReader = getReader();
            SynonymMap.Builder parser = RemoteSynonymFile.getSynonymParser(rulesReader, format, expand, analyzer);
            return parser.build();
        } catch (Exception e) {
            logger.error("reload local synonym {} error!", e, location);
            throw new IllegalArgumentException(
                    "could not reload local synonyms file to build synonyms", e);
        }
    }

	/**
     * 判断是否需要进行重新加载
     * @return true or false
     */
    @Override
    public boolean isNeedReloadSynonymMap() {
        try {
            Long lastModify = getLastModify();
            if (lastModified < lastModify) {
                lastModified = lastModify;
                return true;
            }
        } catch (Exception e) {
            logger.error(e);
        }

        return false;
    }
	
	/**
     * 获取同义词库最后一次修改的时间
     * 用于判断同义词是否需要进行重新加载
     * 
     * @return getLastModify
     */
    public Long getLastModify() {
        ResultSet resultSet = null;
        Long last_modify_long = null;
        try {
            if (connection == null || statement == null) {
                Class.forName(props.getProperty("jdbc.driver"));
                connection = DriverManager.getConnection(
                        props.getProperty("jdbc.url"),
                        props.getProperty("jdbc.user"),
                        props.getProperty("jdbc.password")
                );
                statement = connection.createStatement();
            }
            resultSet = statement.executeQuery(props.getProperty("jdbc.lastModified.synonym.sql"));
            while (resultSet.next()) {
                Timestamp last_modify_dt = resultSet.getTimestamp("last_modify_dt");
                last_modify_long = last_modify_dt.getTime();
            }
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }

        }
        return last_modify_long;
    }

	/**
     * 查询数据库中的同义词
     * @return DBData
     */
    public ArrayList<String> getDBData() {
        ArrayList<String> arrayList = new ArrayList<>();
        ResultSet resultSet = null;
        try {
            if (connection == null || statement == null) {
                Class.forName(props.getProperty("jdbc.driver"));
                connection = DriverManager.getConnection(
                        props.getProperty("jdbc.url"),
                        props.getProperty("jdbc.user"),
                        props.getProperty("jdbc.password")
                );
                statement = connection.createStatement();
            }
            resultSet = statement.executeQuery(props.getProperty("jdbc.reload.synonym.sql"));
            while (resultSet.next()) {
                String theWord = resultSet.getString("words");
                arrayList.add(theWord);
            }
        } catch (ClassNotFoundException e) {
            logger.error(e);
        } catch (SQLException e) {
            logger.error(e);
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }

        }
        return arrayList;
    }
	
	/**
     * 同义词库的加载
     * @return Reader
     */
    @Override
    public Reader getReader() {

        StringBuffer sb = new StringBuffer();
        try {
            ArrayList<String> dbData = getDBData();
            for (int i = 0; i < dbData.size(); i++) {
                logger.info("load the synonym from db," + dbData.get(i));
                sb.append(dbData.get(i))
                        .append(System.getProperty("line.separator"));
            }
        } catch (Exception e) {
            logger.error("reload synonym from db failed");
        }
        return new StringReader(sb.toString());
    }
}
2.2 修改 DynamicSynonymTokenFilterFactory 类

说明:DynamicSynonymTokenFilterFactory 是对词库的路径进行选择,通过不同的参数设置,调用不同路径下的词库:

主要是通过 synonyms_path 这个参数进行设置

在这里插入图片描述
新添一个路径,代码如下:

		SynonymFile synonymFile;
		
		// fromDB 可自定义
        if (location.equals("fromDB")) {
            synonymFile = new DBRemoteSynonymFile(env, analyzer, expand, format,
                    location);
        } else if (location.startsWith("http://") || location.startsWith("https://")) {
            synonymFile = new RemoteSynonymFile(env, analyzer, expand, format,
                    location);
        } else {
            synonymFile = new LocalSynonymFile(env, analyzer, expand, format,
                    location);
        }
        synonymMap = synonymFile.reloadSynonymMap();
2.3 创建配置文件

在工程的同级目录下新建一个 config/jdbc-reload.properties 配置文件,便于用户对数据库进行修改。

##数据库相关配置
jdbc.url=jdbc:postgresql://192.168.***.***:5432/search
jdbc.user=***
jdbc.password=***
jdbc.reload.synonym.sql=SELECT words FROM public.sys_synonym_t where is_vaild = true
jdbc.lastModified.synonym.sql=SELECT max(last_modify_dt) as last_modify_dt FROM public.sys_synonym_t
jdbc.driver=org.postgresql.Driver
2.4 修改 plugin.xml 文件

在这里插入图片描述

2.5 编译并打包

在这里插入图片描述

2.6 上传至服务器

2.5.1 在 ES 的安装路径下的 plugins 文件夹下,新建 analyzer-synonym 文件夹

[root@console plugins]# pwd
${ELASTIC_HOME}/plugins
[root@console plugins]# ll
total 8
drwxrwxr-x 3 elastic elastic 4096 Aug 29 16:49 analyzer-synonym
drwxrwxr-x 3 elastic elastic 4096 Aug 29 17:38 ik-analysis

2.5.2 解压并修改用户所属组

[root@console analyzer-synonym]# ls
elasticsearch-analysis-dynamic-synonym-7.2.0.zip
[root@console analyzer-synonym]# unzip elasticsearch-analysis-dynamic-synonym-7.2.0.zip
Archive:  elasticsearch-analysis-dynamic-synonym-7.2.0.zip
   creating: config/
  inflating: config/jdbc-reload.properties
  inflating: plugin-descriptor.properties
  inflating: plugin-security.policy
  inflating: httpclient-4.4.1.jar
  inflating: httpcore-4.4.1.jar
  inflating: commons-logging-1.2.jar
  inflating: commons-codec-1.9.jar
  inflating: postgresql-9.4.1212.jar
  inflating: mysql-connector-java-5.1.47.jar
  inflating: elasticsearch-analysis-dynamic-synonym-7.2.0.jar
[root@console analyzer-synonym]# rm -rf elasticsearch-analysis-dynamic-synonym-7.2.0.zip
[root@console analyzer-synonym]# chown -R elastic:elastic ./*

2.5.3 重启 Elasticsearch 服务
在这里插入图片描述
IK分词器的远程词典热词加载方式如下:https://blog.csdn.net/weixin_43315211/article/details/99650363

3、测试

新建一个 mapping

PUT synonyms_index
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1,
      "analysis": {
        "analyzer": {
          "synonym": {
            "type":"custom",
            "tokenizer": "ik_smart_custom",
            "filter": ["synonym_custom"]
          }
        },
        "filter": {
          "synonym_custom": {
            "type": "dynamic_synonym",
            "synonyms_path": "fromDB"
          }
        }
    }
  },
  "mappings": {
      "properties": {
        "name": {
          "type": "text",
          "analyzer": "synonym"
        }
      }
  }
}

测试:

GET /synonyms_index/_analyze
{
  "text": "开心",
  "analyzer": "synonym"
}

{
  "tokens" : [
    {
      "token" : "开心",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 0
    },
    {
      "token" : "高兴",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "SYNONYM",
      "position" : 0
    }
  ]
}

可以明显的看出,已经进行了同义词分词。

对同义词库进行新增同义词
在这里插入图片描述
查看 elasticsearch 服务器日志
在这里插入图片描述
可以看到同义词库已经进行更新

GET /synonyms_index/_analyze
{
  "text": "开心",
  "analyzer": "synonym"
}

{
  "tokens" : [
    {
      "token" : "开心",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 0
    },
    {
      "token" : "高兴",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "SYNONYM",
      "position" : 0
    },
    {
      "token" : "开森",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "SYNONYM",
      "position" : 0
    }
  ]
}

后续对删除也进行了测试,同样可以实现。

问题解决
[2019-09-02T14:18:17,613][ERROR][o.w.a.d.Monitor          ] [master01] erorr
org.postgresql.util.PSQLException: Your security policy has prevented the connection from being attempted.  You probably need to grant the connect java.net.SocketPermission to the database server host and port that you wish to connect to.
        at org.postgresql.Driver.connect(Driver.java:287) ~[postgresql-9.4.1212.jar:9.4.1212]
        at java.sql.DriverManager.getConnection(DriverManager.java:664) ~[?:1.8.0_191]
        at java.sql.DriverManager.getConnection(DriverManager.java:247) ~[?:1.8.0_191]
        at org.wltea.analyzer.dic.Dictionary.loadDBStopWordsDict(Dictionary.java:573) [elasticsearch-analysis-ik-7.2.0.jar:?]
        at org.wltea.analyzer.dic.Dictionary.access$300(Dictionary.java:61) [elasticsearch-analysis-ik-7.2.0.jar:?]
        at org.wltea.analyzer.dic.Dictionary$StopDictReloadThread.run(Dictionary.java:718) [elasticsearch-analysis-ik-7.2.0.jar:?]
        at java.lang.Thread.run(Thread.java:748) [?:1.8.0_191]
Caused by: java.security.AccessControlException: access denied ("java.net.SocketPermission" "192.168.108.126:5432" "connect,resolve")
        at java.security.AccessControlContext.checkPermission(AccessControlContext.java:472) ~[?:1.8.0_191]
        at java.security.AccessController.checkPermission(AccessController.java:884) ~[?:1.8.0_191]
        at java.lang.SecurityManager.checkPermission(SecurityManager.java:549) ~[?:1.8.0_191]
        at java.lang.SecurityManager.checkConnect(SecurityManager.java:1051) ~[?:1.8.0_191]
        at java.net.Socket.connect(Socket.java:584) ~[?:1.8.0_191]
        at org.postgresql.core.PGStream.<init>(PGStream.java:61) ~[postgresql-9.4.1212.jar:9.4.1212]
        at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:144) ~[postgresql-9.4.1212.jar:9.4.1212]
        at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:52) ~[postgresql-9.4.1212.jar:9.4.1212]
        at org.postgresql.jdbc.PgConnection.<init>(PgConnection.java:216) ~[postgresql-9.4.1212.jar:9.4.1212]
        at org.postgresql.Driver.makeConnection(Driver.java:404) ~[postgresql-9.4.1212.jar:9.4.1212]
        at org.postgresql.Driver.connect(Driver.java:272) ~[postgresql-9.4.1212.jar:9.4.1212]

主要报错:

Caused by: java.security.AccessControlException: access denied ("java.net.SocketPermission" "192.168.108.126:5432" "connect,resolve")

java权限问题,需要在java中添加相应的权限:

[elastic@master01 bin]$ sudo vim $JAVA_HOME/jre/lib/security/java.policy

	
// Standard extensions get all permissions by default

grant codeBase "file:${{java.ext.dirs}}/*" {
        permission java.security.AllPermission;
};

// default permissions granted to all domains

grant {
        // Allows any thread to stop itself using the java.lang.Thread.stop()
        // method that takes no argument.
        // Note that this permission is granted by default only to remain
        // backwards compatible.
        // It is strongly recommended that you either remove this permission
        // from this policy file or further restrict it to code sources
        // that you specify, because Thread.stop() is potentially unsafe.
        // See the API specification of java.lang.Thread.stop() for more
        // information.
        permission java.lang.RuntimePermission "stopThread";

        // allows anyone to listen on dynamic ports
        permission java.net.SocketPermission "localhost:0", "listen";
        //添加对应的权限
        permission java.net.SocketPermission "*", "connect,resolve";

  • 2
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
elasticsearch-analysis-dynamic-synonym-master.zip是一个压缩文件,其中包含用于Elasticsearch动态同义词分析插件。Elasticsearch是一个开源的分布式搜索引擎,用于快速检索和分析大量数据。 这个插件的作用是允许用户根据需要动态更新和管理同义词,以提高搜索的准确性和覆盖范围。在搜索引擎中,同义词是指具有相似意义的不同词语,如"汽车"和"车辆"。通过使用同义词分析插件,搜索引擎可以将搜索词与同义词匹配,从而拓宽搜索结果的范围。 要使用这个插件,首先需要将压缩文件解压缩到适当的目录中。然后,将插件添加到Elasticsearch的插件目录中,并重新启动Elasticsearch服务。一旦插件安装完成,就可以开始配置和使用动态同义词分析器。 在使用这个插件时,用户可以定义一个同义词文件,其中包含需要用于匹配的同义词对。插件将定期(可以通过设置来调整时间间隔)扫描这个同义词文件,并将其加载到内存中。然后,当用户执行搜索操作时,分析器将根据这些同义词对修改搜索词,并在搜索过程中考虑到它们。 动态同义词分析插件在大型搜索平台和电子商务领域中非常有用。它可以通过改进搜索引擎的召回率和精确度来提供更好的搜索体验。同义词动态管理也使得系统可以灵活应对新的同义词和词语变化。 总之,elasticsearch-analysis-dynamic-synonym-master.zip是一个用于Elasticsearch动态同义词分析插件,它可以帮助用户优化搜索结果,提高搜索准确性和广度,并灵活应对同义词和词语变化。
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值