1、改造analysis-dynamic-synonym源码访问远程数据库
以上方案还有什么不足呢?
需要新增一个java服务,链路变长,维护工作随之变大,这个该如何解决呢?
2、为了解决维护链路变长的问题,我们考虑将java服务的内容加到同义词插件中,并且重写部分代码
-
源码分析:在dynamic-synonym插件代码中有一个接口是 SynonymFile,这个接口为我们提供了扩展性,里面有两个实现,分别是LocalSynonymFile类和RemoteSynonymFile类,其作用分别是读取本地文件获取同义词数据 和 请求远程同义词数据的;
-
基于这一特性,我们可以新增一个类MySqlRemoteSynonymFile实现SynonymFile接口并重新其方法;
- 实现代码:
package com.bellszhu.elasticsearch.plugin.synonym.analysis;
import com.bellszhu.elasticsearch.plugin.DynamicSynonymPlugin;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.env.Environment;
import java.io.*;
import java.nio.file.Path;
import java.sql.*;
import java.util.ArrayList;
import java.util.Properties;
public class MySqlRemoteSynonymFile implements SynonymFile{
/**
* 数据库配置文件名
*/
private final static String DB_PROPERTIES = "jdbc-reload.properties";
private static Logger logger = LogManager.getLogger("dynamic-synonym");
private String format;
private boolean expand;
private boolean lenient;
private Analyzer analyzer;
private Environment env;
// 数据库配置
private String location;
// 数据库地址
private static final String jdbcUrl = "jdbc.url";
// 数据库用户名
private static final String jdbcUser = "jdbc.user";
// 数据库密码
private static final String jdbcPassword = "jdbc.password";
/**
* 当前节点的同义词版本号
*/
private long thisSynonymVersion = -1L;
private Connection connection = null;
private Statement statement = null;
private Properties props;
private Path conf_dir;
MySqlRemoteSynonymFile(Environment env, Analyzer analyzer,
boolean expand, boolean lenient, String format, String location) {
this.analyzer = analyzer;
this.expand = expand;
this.format = format;
this.lenient = lenient;
this.env = env;
this.location = location;
this.props = new Properties();
//读取当前 jar 包存放的路径
Path filePath = PathUtils.get(new File(DynamicSynonymPlugin.class.getProtectionDomain().getCodeSource()
.getLocation().getPath())
.getParent(), "config")
.toAbsolutePath();
this.conf_dir = filePath.resolve(DB_PROPERTIES);
//判断文件是否存在
File configFile = conf_dir.toFile();
InputStream input = null;
try {
input = new FileInputStream(configFile);
} catch (FileNotFoundException e) {
logger.info("jdbc-reload.properties 数据库配置文件没有找到, " + e);
}
if (input != null) {
try {
props.load(input);
} catch (IOException e) {
logger.error("数据库配置文件 jdbc-reload.properties 加载失败," + e);
}
}
isNeedReloadSynonymMap();
}
/**
* 加载同义词词典至SynonymMap中
* @return SynonymMap
*/
@Override
public SynonymMap reloadSynonymMap() {
try {
logger.info("start reload local synonym from {}.", location);
Reader rulesReader = getReader();
SynonymMap.Builder parser = RemoteSynonymFile.getSynonymParser(rulesReader, format, expand, lenient, analyzer);
return parser.build();
} catch (Exception e) {
logger.error("reload local synonym {} error!", e, location);
throw new IllegalArgumentException(
"could not reload local synonyms file to build synonyms", e);
}
}
/**
* 判断是否需要进行重新加载
* @return true or false
*/
@Override
public boolean isNeedReloadSynonymMap() {
try {
Long mysqlVersion = getMySqlSynonymVersion();
if (thisSynonymVersion < mysqlVersion) {
thisSynonymVersion = mysqlVersion;
return true;
}
} catch (Exception e) {
logger.error(e);
}
return false;
}
/**
* 获取MySql中同义词版本号信息
* 用于判断同义词是否需要进行重新加载
*
* @return getLastModify
*/
public Long getMySqlSynonymVersion() {
ResultSet resultSet = null;
Long mysqlSynonymVersion = 0L;
try {
if (connection == null || statement == null) {
statement = getConnection(props, connection);
}
resultSet = statement.executeQuery(props.getProperty("jdbc.reload.swith.synonym.version"));
while (resultSet.next()) {
mysqlSynonymVersion = resultSet.getLong("swith_state");
logger.info("当前MySql同义词版本号为:{}, 当前节点同义词库版本号为:{}", mysqlSynonymVersion, thisSynonymVersion);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
try {
if (resultSet != null) {
resultSet.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
return mysqlSynonymVersion;
}
/**
* 查询数据库中的同义词
* @return DBData
*/
public ArrayList<String> getDBData() {
ArrayList<String> arrayList = new ArrayList<>();
ResultSet resultSet = null;
try {
if (connection == null || statement == null) {
// Class.forName(props.getProperty("jdbc.driver"));
statement = getConnection(props, connection);
}
logger.info("正在执行SQL查询同义词列表,SQL:{}", props.getProperty("jdbc.reload.synonym.sql"));
resultSet = statement.executeQuery(props.getProperty("jdbc.reload.synonym.sql"));
while (resultSet.next()) {
String theWord = resultSet.getString("words");
arrayList.add(theWord);
}
} catch (SQLException e) {
logger.error(e);
} finally {
try {
if (resultSet != null) {
resultSet.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
return arrayList;
}
/**
* 同义词库的加载
* @return Reader
*/
@Override
public Reader getReader() {
StringBuffer sb = new StringBuffer();
try {
ArrayList<String> dbData = getDBData();
for (int i = 0; i < dbData.size(); i++) {
logger.info("正在加载同义词:{}", dbData.get(i));
// 获取一行一行的记录,每一条记录都包含多个词,形成一个词组,词与词之间使用英文逗号分割
sb.append(dbData.get(i))
.append(System.getProperty("line.separator"));
}
} catch (Exception e) {
logger.error("同义词加载失败");
}
return new StringReader(sb.toString());
}
/**
* 获取数据库可执行连接
* @param props
* @param conn
* @throws SQLException
*/
private static Statement getConnection(Properties props, Connection conn) throws SQLException {
conn = DriverManager.getConnection(
props.getProperty(jdbcUrl),
props.getProperty(jdbcUser),
props.getProperty(jdbcPassword));
return conn.createStatement();
}
}
- 完成这些后,找到DynamicSynonymTokenFilterFactory类的getSynonymFile(Analyzer analyzer)方法,对其稍加改动,自定义一个类型,触发调用MySql数据库的查询
- 相关代码:
SynonymFile getSynonymFile(Analyzer analyzer) {
try {
SynonymFile synonymFile;
if ("fromMySql".equals(location)) {
synonymFile = new MySqlRemoteSynonymFile(environment, analyzer, expand, lenient, format, location);
} else if (location.startsWith("http://") || location.startsWith("https://")) {
synonymFile = new RemoteSynonymFile(
environment, analyzer, expand, lenient, format, location);
} else {
synonymFile = new LocalSynonymFile(
environment, analyzer, expand, lenient, format, location);
}
logger.info("scheduledFuture 1 " + scheduledFuture);
if (scheduledFuture == null) {
scheduledFuture = pool.scheduleAtFixedRate(new Monitor(synonymFile),
interval, interval, TimeUnit.SECONDS);
}
return synonymFile;
} catch (Exception e) {
logger.error("failed to get synonyms: " + location, e);
throw new IllegalArgumentException("failed to get synonyms : " + location, e);
}
}
- 引入jdbc驱动的依赖包
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
- 最后一步,我们在进行编译打包前,修改下plugin.xml,新增config目录下的打包配置
- 改动后的整个文件如下:
<?xml version="1.0"?>
<assembly>
<id>-</id>
<formats>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${project.basedir}/config</directory>
<outputDirectory>config</outputDirectory>
</fileSet>
</fileSets>
<files>
<file>
<source>${project.basedir}/src/main/resources/plugin-descriptor.properties</source>
<filtered>true</filtered>
</file>
<file>
<source>${project.basedir}/src/main/resources/plugin-security.policy</source>
<filtered>true</filtered>
</file>
</files>
<dependencySets>
<dependencySet>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<excludes>
<exclude>org.elasticsearch:elasticsearch</exclude>
</excludes>
</dependencySet>
<dependencySet>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>org.apache.httpcomponents:httpclient</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>
- 以上,插件代码已经修改完毕,接下来我们进行编译打包,将版本号改为和ES一致,然后执行maven命令:clean、compile、package
- 将打包好的jar包,复制粘贴到指定文件夹(elasticsearch-7.10.0/plugins/),解压并改名为“dynamic-synonym”
- 在数据库同义词表中新增数据,并修改同义词词库版本号(博主使用的update_time字段控制版本号)
- 创建表SQL脚本:
CREATE TABLE `tb_test` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键id',
`name` varchar(500) NOT NULL DEFAULT '0' COMMENT '名字',
`create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=13 DEFAULT CHARSET=utf8mb4 COMMENT='测试表';
3、验证
- 启动ES服务,cd到elasticsearch-7.10.0/bin/目录下,执行“./elasticsearch”,可以看到运行时,成功加载了ik分词器插件
- 运行es-head插件,进入可视化页面,执行创建索引操作,并指定同义词配置,注意创建索引时,指定的“fromMySql”
PUT http://localhost:9200/test03_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"analysis": {
"filter": {
"mysql_synonym": {
"type": "dynamic_synonym",
"synonyms_path": "fromMySql",
"interval": 30
}
},
"analyzer": {
"ik_syno": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": [
"mysql_synonym"
]
},
"ik_syno_max": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": [
"mysql_synonym"
]
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "my_doc_syno"
},
"remark": {
"type": "text",
"analyzer": "my_doc_syno"
}
}
}
}
- 创建完索引后,数据库中的同义词配置会立即被加载到ES中,后续过30秒检查一次,当发现版本号有更新时,进行配置同步(下方图片日志打印时举例,实际打印文案有出入)
数据验证步骤,同第一种方式(传送门:https://blog.csdn.net/u012888052/article/details/125016668)