文章目录
1. 地址
下载安装elasticsearch对应版本的安装包,本文安装的是elasticsearch7.17.0
链接: elasticsearch-analysis-ik
2. 导入依赖和修改es版本到对应的安装版本
<properties>
<elasticsearch.version>7.17.0</elasticsearch.version>
</properties>
<!--mysql驱动-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.29</version>
</dependency>
3. 创建数据库,新建主词典和停用词典
CREATE TABLE `es_extra_main` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
`word` varchar(255) CHARACTER SET utf8mb4 NOT NULL COMMENT '词',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0' COMMENT '是否已删除',
`update_time` timestamp(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6) COMMENT '更新时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `es_extra_stopword` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
`word` varchar(255) CHARACTER SET utf8mb4 NOT NULL COMMENT '词',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0' COMMENT '是否已删除',
`update_time` timestamp(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6) COMMENT '更新时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
4. 在config下新建jdbc.properties配置相关数据库属性
jdbc.url=jdbc:mysql://192.168.100.231:3306/es?useAffectedRows=true&characterEncoding=UTF-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&useUnicode=true&serverTimezone=GMT%2B8&allowMultiQueries=true
jdbc.username=root
jdbc.password=root
jdbc.driver=com.mysql.cj.jdbc.Driver
jdbc.update.main.dic.sql=SELECT * FROM `es_extra_main` WHERE update_time > ? order by update_time asc
jdbc.update.stopword.sql=SELECT * FROM `es_extra_stopword` WHERE update_time > ? order by update_time asc
jdbc.update.interval=10
5. 打包配置
修改src/main/assemblies/plugin.xml 将 MySQL 驱动的依赖写入,否则打成 zip 后会没有 MySQL 驱动的 jar 包。
<include>mysql:mysql-connector-java</include>
6.修改权限
src/main/resources/plugin-security.policy 添加permission java.lang.RuntimePermission "setContextClassLoader";,否则会因为权限问题抛出以下异常。
grant {
// needed because of the hot reload functionality
permission java.net.SocketPermission "*", "connect,resolve";
permission java.lang.RuntimePermission "setContextClassLoader";
};
7. 新建DatabaseMonitor实现类
- 1.lastUpdateTimeOfMainDic、lastUpdateTimeOfStopword 记录上次处理的最后一条的updateTime
- 2.查出上次处理之后新增或删除的记录
- 3.循环判断 is_deleted 字段,为true则添加词条,false则删除词条
在org.wltea.analyzer.dic
包下创建DatabaseMonitor
类
package org.wltea.analyzer.dic;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.SpecialPermission;
import org.wltea.analyzer.help.ESPluginLoggerFactory;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.sql.*;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
/**
* 通过 mysql 更新词典
*
* @author gblfy
* @date 2021-11-21
* @WebSite gblfy.com
*/
public class DatabaseMonitor implements Runnable {
private static final Logger logger = ESPluginLoggerFactory.getLogger(DatabaseMonitor.class.getName());
public static final String PATH_JDBC_PROPERTIES = "jdbc.properties";
private static final String JDBC_URL = "jdbc.url";
private static final String JDBC_USERNAME = "jdbc.username";
private static final String JDBC_PASSWORD = "jdbc.password";
private static final String JDBC_DRIVER = "jdbc.driver";
private static final String SQL_UPDATE_MAIN_DIC = "jdbc.update.main.dic.sql";
private static final String SQL_UPDATE_STOPWORD = "jdbc.update.stopword.sql";
/**
* 更新间隔
*/
public final static String JDBC_UPDATE_INTERVAL = "jdbc.update.interval";
private static final Timestamp DEFAULT_LAST_UPDATE = Timestamp.valueOf(LocalDateTime.of(LocalDate.of(2020, 1, 1), LocalTime.MIN));
private static Timestamp lastUpdateTimeOfMainDic = null;
private static Timestamp lastUpdateTimeOfStopword = null;
public String getUrl() {
return Dictionary.getSingleton().getProperty(JDBC_URL);
}
public String getUsername() {
return Dictionary.getSingleton().getProperty(JDBC_USERNAME);
}
public String getPassword() {
return Dictionary.getSingleton().getProperty(JDBC_PASSWORD);
}
public String getDriver() {
return Dictionary.getSingleton().getProperty(JDBC_DRIVER);
}
public String getUpdateMainDicSql() {
return Dictionary.getSingleton().getProperty(SQL_UPDATE_MAIN_DIC);
}
public String getUpdateStopwordSql() {
return Dictionary.getSingleton().getProperty(SQL_UPDATE_STOPWORD);
}
/**
* 加载MySQL驱动
*/
public DatabaseMonitor() {
SpecialPermission.check();
AccessController.doPrivileged((PrivilegedAction<Void>) () -> {
try {
Class.forName(getDriver());
} catch (ClassNotFoundException e) {
logger.error("mysql jdbc driver not found", e);
}
return null;
});
}
@Override
public void run() {
SpecialPermission.check();
AccessController.doPrivileged((PrivilegedAction<Void>) () -> {
Connection conn = getConnection();
// 更新主词典
updateMainDic(conn);
// 更新停用词
updateStopword(conn);
closeConnection(conn);
return null;
});
}
public Connection getConnection() {
Connection connection = null;
try {
connection = DriverManager.getConnection(getUrl(), getUsername(), getPassword());
} catch (SQLException e) {
logger.error("failed to get connection", e);
}
return connection;
}
public void closeConnection(Connection conn) {
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
logger.error("failed to close Connection", e);
}
}
}
public void closeRsAndPs(ResultSet rs, PreparedStatement ps) {
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
logger.error("failed to close ResultSet", e);
}
}
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
logger.error("failed to close PreparedStatement", e);
}
}
}
/**
* 主词典
*/
public synchronized void updateMainDic(Connection conn) {
logger.info("start update main dic");
int numberOfAddWords = 0;
int numberOfDisableWords = 0;
PreparedStatement ps = null;
ResultSet rs = null;
try {
String sql = getUpdateMainDicSql();
Timestamp param = lastUpdateTimeOfMainDic == null ? DEFAULT_LAST_UPDATE : lastUpdateTimeOfMainDic;
logger.info("param: " + param);
ps = conn.prepareStatement(sql);
ps.setTimestamp(1, param);
rs = ps.executeQuery();
while (rs.next()) {
String word = rs.getString("word");
word = word.trim();
if (word.isEmpty()) {
continue;
}
lastUpdateTimeOfMainDic = rs.getTimestamp("update_time");
if (rs.getBoolean("is_deleted")) {
logger.info("[main dic] disable word: {}", word);
// 删除
Dictionary.disableWord(word);
numberOfDisableWords++;
} else {
logger.info("[main dic] add word: {}", word);
// 添加
Dictionary.addWord(word);
numberOfAddWords++;
}
}
logger.info("end update main dic -> addWord: {}, disableWord: {}", numberOfAddWords, numberOfDisableWords);
} catch (SQLException e) {
logger.error("failed to update main_dic", e);
// 关闭 ResultSet、PreparedStatement
closeRsAndPs(rs, ps);
}
}
/**
* 停用词
*/
public synchronized void updateStopword(Connection conn) {
logger.info("start update stopword");
int numberOfAddWords = 0;
int numberOfDisableWords = 0;
PreparedStatement ps = null;
ResultSet rs = null;
try {
String sql = getUpdateStopwordSql();
Timestamp param = lastUpdateTimeOfStopword == null ? DEFAULT_LAST_UPDATE : lastUpdateTimeOfStopword;
logger.info("param: " + param);
ps = conn.prepareStatement(sql);
ps.setTimestamp(1, param);
rs = ps.executeQuery();
while (rs.next()) {
String word = rs.getString("word");
word = word.trim();
if (word.isEmpty()) {
continue;
}
lastUpdateTimeOfStopword = rs.getTimestamp("update_time");
if (rs.getBoolean("is_deleted")) {
logger.info("[stopword] disable word: {}", word);
// 删除
Dictionary.disableStopword(word);
numberOfDisableWords++;
} else {
logger.info("[stopword] add word: {}", word);
// 添加
Dictionary.addStopword(word);
numberOfAddWords++;
}
}
logger.info("end update stopword -> addWord: {}, disableWord: {}", numberOfAddWords, numberOfDisableWords);
} catch (SQLException e) {
logger.error("failed to update main_dic", e);
} finally {
// 关闭 ResultSet、PreparedStatement
closeRsAndPs(rs, ps);
}
}
}
8. 修改Dictionary
8.1在构造方法Dictionary 中加载 jdbc.properties的方法
// 加载 jdbc.properties 文件
loadJdbcProperties();
8.2 将 getProperty()改为 public
8.3 添加了几个方法,用于增删词条
/**
* 加载新词条
*/
public static void addWord(String word) {
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
}
/**
* 移除(屏蔽)词条
*/
public static void disableWord(String word) {
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
}
/**
* 加载新停用词
*/
public static void addStopword(String word) {
singleton._StopWords.fillSegment(word.trim().toLowerCase().toCharArray());
}
/**
* 移除(屏蔽)停用词
*/
public static void disableStopword(String word) {
singleton._StopWords.disableSegment(word.trim().toLowerCase().toCharArray());
}
/**
* 加载 jdbc.properties
*/
public void loadJdbcProperties() {
Path file = PathUtils.get(getDictRoot(), DatabaseMonitor.PATH_JDBC_PROPERTIES);
try {
props.load(new FileInputStream(file.toFile()));
logger.info("====================================properties====================================");
for (Map.Entry<Object, Object> entry : props.entrySet()) {
logger.info("{}: {}", entry.getKey(), entry.getValue());
}
logger.info("====================================properties====================================");
} catch (IOException e) {
logger.error("failed to read file: " + DatabaseMonitor.PATH_JDBC_PROPERTIES, e);
}
}
8.4 initial()启动自己实现的数据库监控线程
// 建立数据库监控线程
pool.scheduleAtFixedRate(new DatabaseMonitor(), 10, Long.parseLong(getSingleton().getProperty(DatabaseMonitor.JDBC_UPDATE_INTERVAL)), TimeUnit.SECONDS);
9 打包
10 .压缩包上传到elasticsearch插件目录下并解压
11 重启docker ,并测试效果
当数据库51英寸这个word的 is_deleted为1时,表示这个分词不起作用
当数据库51英寸这个word的 is_deleted为0时,表示这个分词起作用