改造analysis-dynamic-synonym源码访问远程数据库

7 篇文章 0 订阅

1、改造analysis-dynamic-synonym源码访问远程数据库

以上方案还有什么不足呢?
需要新增一个java服务,链路变长,维护工作随之变大,这个该如何解决呢?

2、为了解决维护链路变长的问题,我们考虑将java服务的内容加到同义词插件中,并且重写部分代码

  • 源码分析:在dynamic-synonym插件代码中有一个接口是 SynonymFile,这个接口为我们提供了扩展性,里面有两个实现,分别是LocalSynonymFile类和RemoteSynonymFile类,其作用分别是读取本地文件获取同义词数据 和 请求远程同义词数据的;
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述

  • 基于这一特性,我们可以新增一个类MySqlRemoteSynonymFile实现SynonymFile接口并重新其方法;
    在这里插入图片描述
    在这里插入图片描述

  • 实现代码:
package com.bellszhu.elasticsearch.plugin.synonym.analysis;

import com.bellszhu.elasticsearch.plugin.DynamicSynonymPlugin;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.env.Environment;

import java.io.*;
import java.nio.file.Path;
import java.sql.*;
import java.util.ArrayList;
import java.util.Properties;

public class MySqlRemoteSynonymFile implements SynonymFile{

    /**
     * 数据库配置文件名
     */
    private final static String DB_PROPERTIES = "jdbc-reload.properties";
    private static Logger logger = LogManager.getLogger("dynamic-synonym");

    private String format;

    private boolean expand;

    private boolean lenient;

    private Analyzer analyzer;

    private Environment env;

    // 数据库配置
    private String location;

    // 数据库地址
    private static final String jdbcUrl = "jdbc.url";
    // 数据库用户名
    private static final String jdbcUser = "jdbc.user";
    // 数据库密码
    private static final String jdbcPassword = "jdbc.password";

    /**
     * 当前节点的同义词版本号
     */
    private long thisSynonymVersion = -1L;

    private Connection connection = null;

    private Statement statement = null;

    private Properties props;

    private Path conf_dir;

    MySqlRemoteSynonymFile(Environment env, Analyzer analyzer,
                           boolean expand, boolean lenient, String format, String location) {
        this.analyzer = analyzer;
        this.expand = expand;
        this.format = format;
        this.lenient = lenient;
        this.env = env;
        this.location = location;
        this.props = new Properties();

        //读取当前 jar 包存放的路径
        Path filePath = PathUtils.get(new File(DynamicSynonymPlugin.class.getProtectionDomain().getCodeSource()
                .getLocation().getPath())
                .getParent(), "config")
                .toAbsolutePath();
        this.conf_dir = filePath.resolve(DB_PROPERTIES);

        //判断文件是否存在
        File configFile = conf_dir.toFile();
        InputStream input = null;
        try {
            input = new FileInputStream(configFile);
        } catch (FileNotFoundException e) {
            logger.info("jdbc-reload.properties 数据库配置文件没有找到, " + e);
        }
        if (input != null) {
            try {
                props.load(input);
            } catch (IOException e) {
                logger.error("数据库配置文件 jdbc-reload.properties 加载失败," + e);
            }
        }
        isNeedReloadSynonymMap();
    }

    /**
     * 加载同义词词典至SynonymMap中
     * @return SynonymMap
     */
    @Override
    public SynonymMap reloadSynonymMap() {
        try {
            logger.info("start reload local synonym from {}.", location);
            Reader rulesReader = getReader();
            SynonymMap.Builder parser = RemoteSynonymFile.getSynonymParser(rulesReader, format, expand, lenient, analyzer);
            return parser.build();
        } catch (Exception e) {
            logger.error("reload local synonym {} error!", e, location);
            throw new IllegalArgumentException(
                    "could not reload local synonyms file to build synonyms", e);
        }
    }

    /**
     * 判断是否需要进行重新加载
     * @return true or false
     */
    @Override
    public boolean isNeedReloadSynonymMap() {
        try {
            Long mysqlVersion = getMySqlSynonymVersion();
            if (thisSynonymVersion < mysqlVersion) {
                thisSynonymVersion = mysqlVersion;
                return true;
            }
        } catch (Exception e) {
            logger.error(e);
        }
        return false;
    }

    /**
     * 获取MySql中同义词版本号信息
     * 用于判断同义词是否需要进行重新加载
     *
     * @return getLastModify
     */
    public Long getMySqlSynonymVersion() {
        ResultSet resultSet = null;
        Long mysqlSynonymVersion = 0L;
        try {
            if (connection == null || statement == null) {
                statement = getConnection(props, connection);
            }
            resultSet = statement.executeQuery(props.getProperty("jdbc.reload.swith.synonym.version"));
            while (resultSet.next()) {
                mysqlSynonymVersion = resultSet.getLong("swith_state");
                logger.info("当前MySql同义词版本号为:{}, 当前节点同义词库版本号为:{}", mysqlSynonymVersion, thisSynonymVersion);
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        return mysqlSynonymVersion;
    }

    /**
     * 查询数据库中的同义词
     * @return DBData
     */
    public ArrayList<String> getDBData() {
        ArrayList<String> arrayList = new ArrayList<>();
        ResultSet resultSet = null;
        try {
            if (connection == null || statement == null) {
        // Class.forName(props.getProperty("jdbc.driver"));
                statement = getConnection(props, connection);
            }
            logger.info("正在执行SQL查询同义词列表,SQL:{}", props.getProperty("jdbc.reload.synonym.sql"));
            resultSet = statement.executeQuery(props.getProperty("jdbc.reload.synonym.sql"));
            while (resultSet.next()) {
                String theWord = resultSet.getString("words");
                arrayList.add(theWord);
            }
        } catch (SQLException e) {
            logger.error(e);
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }

        }
        return arrayList;
    }

    /**
     * 同义词库的加载
     * @return Reader
     */
    @Override
    public Reader getReader() {

        StringBuffer sb = new StringBuffer();
        try {
            ArrayList<String> dbData = getDBData();
            for (int i = 0; i < dbData.size(); i++) {
                logger.info("正在加载同义词:{}", dbData.get(i));
                // 获取一行一行的记录,每一条记录都包含多个词,形成一个词组,词与词之间使用英文逗号分割
                sb.append(dbData.get(i))
                        .append(System.getProperty("line.separator"));
            }
        } catch (Exception e) {
            logger.error("同义词加载失败");
        }
        return new StringReader(sb.toString());
    }

    /**
     * 获取数据库可执行连接
     * @param props
     * @param conn
     * @throws SQLException
     */
    private static Statement getConnection(Properties props, Connection conn) throws SQLException {
        conn = DriverManager.getConnection(
                props.getProperty(jdbcUrl),
                props.getProperty(jdbcUser),
                props.getProperty(jdbcPassword));
        return conn.createStatement();
    }
}

  • 完成这些后,找到DynamicSynonymTokenFilterFactory类的getSynonymFile(Analyzer analyzer)方法,对其稍加改动,自定义一个类型,触发调用MySql数据库的查询
    在这里插入图片描述
  • 相关代码:
    SynonymFile getSynonymFile(Analyzer analyzer) {
        try {
            SynonymFile synonymFile;
            if ("fromMySql".equals(location)) {
                synonymFile = new MySqlRemoteSynonymFile(environment, analyzer, expand, lenient, format, location);
            } else if (location.startsWith("http://") || location.startsWith("https://")) {
                synonymFile = new RemoteSynonymFile(
                        environment, analyzer, expand, lenient,  format, location);
            } else {
                synonymFile = new LocalSynonymFile(
                        environment, analyzer, expand, lenient, format, location);
            }
            logger.info("scheduledFuture 1 " + scheduledFuture);
            if (scheduledFuture == null) {
                scheduledFuture = pool.scheduleAtFixedRate(new Monitor(synonymFile),
                                interval, interval, TimeUnit.SECONDS);
            }
            return synonymFile;
        } catch (Exception e) {
            logger.error("failed to get synonyms: " + location, e);
            throw new IllegalArgumentException("failed to get synonyms : " + location, e);
        }
    }
  • 引入jdbc驱动的依赖包
<dependency>
    <groupId>mysql</groupId>
    <artifactId>mysql-connector-java</artifactId>
    <version>5.1.38</version>
</dependency>
  • 最后一步,我们在进行编译打包前,修改下plugin.xml,新增config目录下的打包配置

在这里插入图片描述

  • 改动后的整个文件如下:
<?xml version="1.0"?>
<assembly>
    <id>-</id>
    <formats>
        <format>zip</format>
    </formats>
    <includeBaseDirectory>false</includeBaseDirectory>

    <fileSets>
        <fileSet>
            <directory>${project.basedir}/config</directory>
            <outputDirectory>config</outputDirectory>
        </fileSet>
    </fileSets>

    <files>
        <file>
            <source>${project.basedir}/src/main/resources/plugin-descriptor.properties</source>
            <filtered>true</filtered>
        </file>
        <file>
            <source>${project.basedir}/src/main/resources/plugin-security.policy</source>
            <filtered>true</filtered>
        </file>
    </files>
    <dependencySets>
        <dependencySet>
            <useProjectArtifact>true</useProjectArtifact>
            <useTransitiveFiltering>true</useTransitiveFiltering>
            <excludes>
                <exclude>org.elasticsearch:elasticsearch</exclude>
            </excludes>
        </dependencySet>
        <dependencySet>
            <useProjectArtifact>true</useProjectArtifact>
            <useTransitiveFiltering>true</useTransitiveFiltering>
            <includes>
                <include>org.apache.httpcomponents:httpclient</include>
            </includes>
        </dependencySet>
    </dependencySets>
</assembly>

  • 以上,插件代码已经修改完毕,接下来我们进行编译打包,将版本号改为和ES一致,然后执行maven命令:clean、compile、package
  • 将打包好的jar包,复制粘贴到指定文件夹(elasticsearch-7.10.0/plugins/),解压并改名为“dynamic-synonym”
    在这里插入图片描述
  • 在数据库同义词表中新增数据,并修改同义词词库版本号(博主使用的update_time字段控制版本号)
    在这里插入图片描述
  • 创建表SQL脚本:
CREATE TABLE `tb_test` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键id',
  `name` varchar(500) NOT NULL DEFAULT '0' COMMENT '名字',
  `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '更新时间',
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=13 DEFAULT CHARSET=utf8mb4 COMMENT='测试表';

3、验证

  • 启动ES服务,cd到elasticsearch-7.10.0/bin/目录下,执行“./elasticsearch”,可以看到运行时,成功加载了ik分词器插件
    在这里插入图片描述
  • 运行es-head插件,进入可视化页面,执行创建索引操作,并指定同义词配置,注意创建索引时,指定的“fromMySql”
PUT http://localhost:9200/test03_index
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0,
    "index": {
      "analysis": {
        "filter": {
          "mysql_synonym": {
            "type": "dynamic_synonym",
            "synonyms_path": "fromMySql",
            "interval": 30
          }
        },
        "analyzer": {
          "ik_syno": {
            "type": "custom",
            "tokenizer": "ik_smart",
            "filter": [
              "mysql_synonym"
            ]
          },
          "ik_syno_max": {
            "type": "custom",
            "tokenizer": "ik_max_word",
            "filter": [
              "mysql_synonym"
            ]
          }
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "analyzer": "my_doc_syno"
      },
      "remark": {
        "type": "text",
        "analyzer": "my_doc_syno"
      }
    }
  }
}
  • 创建完索引后,数据库中的同义词配置会立即被加载到ES中,后续过30秒检查一次,当发现版本号有更新时,进行配置同步(下方图片日志打印时举例,实际打印文案有出入)
    在这里插入图片描述

数据验证步骤,同第一种方式(传送门:https://blog.csdn.net/u012888052/article/details/125016668)

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Happy王子乐

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值