1、下载 elasticsearch-analysis-ik 源码包
下载地址:
https://github.com/medcl/elasticsearch-analysis-ik/releases
2、修改源码
修改pom.xml中对应版本号
org.wltea.analyzer.dic.Dictionary 单例类的初始化方法 initial,所有词库都是在这里进行创建
/**
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
* 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
*
* @return Dictionary
*/
public static synchronized void initial(Configuration cfg) {
if (singleton == null) {
synchronized (Dictionary.class) {
if (singleton == null) {
singleton = new Dictionary(cfg);
//主词库
singleton.loadMainDict();
singleton.loadSurnameDict();
singleton.loadQuantifierDict();
singleton.loadSuffixDict();
singleton.loadPrepDict();
//停用词库
singleton.loadStopWordDict();
//新增线程加载词库
new Thread(new HotDict()).start();
if(cfg.isEnableRemoteDict()){
// 建立监控线程
for (String location : singleton.getRemoteExtDictionarys()) {
// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
}
for (String location : singleton.getRemoteExtStopWordDictionarys()) {
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
}
}
}
}
}
}
新增类HotDict,循环调用方法,对词库进行更新
package org.wltea.analyzer.dic;
public class HotDict implements Runnable{
@Override
public void run() {
while (true) {
Dictionary.getSingleton().reLoadMainDict();
}
}
}
在 loadMainDict 方法中添加自定义的 MySQL 词库
private void loadMainDict() {
// 建立一个主词典实例
_MainDict = new DictSegment((char) 0);
// 读取主词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
loadDictFile(_MainDict, file, false, "Main Dict");
// 加载扩展词典
this.loadExtDict();
// 加载远程自定义词库
this.loadRemoteExtDict();
// 加载远程MySql自定义词库
this.loadMySQLExtDict();
}
/**
* 加载用户配置的MySQL热词库
*/
private void loadMySQLExtDict() {
ResultSet mainRs = null;
Connection conn = null;
Statement stmt = null;
try {
Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
props.load(new FileInputStream(file.toFile()));
logger.info("[==========]query hot dict from DB, " + props.getProperty("jdbc.reload.sql") + "......");
Class.forName(props.getProperty("jdbc.driver"));
conn = DriverManager.getConnection(
props.getProperty("jdbc.url"),
props.getProperty("jdbc.user"),
props.getProperty("jdbc.password"));
String main_sql = props.getProperty("jdbc.reload.main.sql");
stmt = conn.createStatement();
mainRs = stmt.executeQuery(main_sql);
while (mainRs.next()) {
String theWord = mainRs.getString("words");
logger.info("hot word: " + theWord);
_MainDict.fillSegment(theWord.trim().toCharArray());
}
Thread.sleep(Integer.valueOf(props.getProperty("jdbc.reload.interval")));
} catch (Exception e) {
logger.error("erorr", e);
} finally {
try {
if (mainRs != null) {
mainRs.close();
}
if (stmt != null) {
stmt.close();
}
if (conn != null) {
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
修改停词的方法
/**
* 加载用户扩展的停止词词典
*/
private void loadStopWordDict() {
// 建立主词典实例
_StopWords = new DictSegment((char) 0);
// 读取主词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
loadDictFile(_StopWords, file, false, "Main Stopwords");
// 加载扩展停止词典
List<String> extStopWordDictFiles = getExtStopWordDictionarys();
if (extStopWordDictFiles != null) {
for (String extStopWordDictName : extStopWordDictFiles) {
logger.info("[Dict Loading] " + extStopWordDictName);
// 读取扩展词典文件
file = PathUtils.get(extStopWordDictName);
loadDictFile(_StopWords, file, false, "Extra Stopwords");
}
}
// 加载远程停用词典
List<String> remoteExtStopWordDictFiles = getRemoteExtStopWordDictionarys();
for (String location : remoteExtStopWordDictFiles) {
logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location);
// 如果找不到扩展的字典,则忽略
if (lists == null) {
logger.error("[Dict Loading] " + location + " load failed");
continue;
}
for (String theWord : lists) {
if (theWord != null && !"".equals(theWord.trim())) {
// 加载远程词典数据到主内存中
logger.info(theWord);
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
}
}
}
// 加载MySQL停用词词典
this.loadMySQLExtStopDict();
}
/**
* MySQL热停词
*/
private void loadMySQLExtStopDict() {
ResultSet mainRs = null;
Connection conn = null;
Statement stmt = null;
try {
Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
props.load(new FileInputStream(file.toFile()));
logger.info("[==========]query hot dict from DB, " + props.getProperty("jdbc.reload.sql") + "......");
Class.forName(props.getProperty("jdbc.driver"));
conn = DriverManager.getConnection(
props.getProperty("jdbc.url"),
props.getProperty("jdbc.user"),
props.getProperty("jdbc.password"));
String main_sql = props.getProperty("jdbc.reload.stopWord.sql");
stmt = conn.createStatement();
mainRs = stmt.executeQuery(main_sql);
while (mainRs.next()) {
String theWord = mainRs.getString("words");
logger.info("stop word: " + theWord);
_StopWords.fillSegment(theWord.trim().toCharArray());
}
Thread.sleep(Integer.valueOf(props.getProperty("jdbc.reload.interval")));
} catch (Exception e) {
logger.error("erorr", e);
} finally {
try {
if (mainRs != null) {
mainRs.close();
}
if (stmt != null) {
stmt.close();
}
if (conn != null) {
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
配置文件 config/jdbc-reload.properties
jdbc.url=jdbc:mysql://localhost:3306/elasticsearch
jdbc.user=root
jdbc.password=passwd
jdbc.reload.main.sql=SELECT words FROM msb_extword
jdbc.reload.stopWord.sql=SELECT words FROM msb_stopword
jdbc.reload.interval=10000
jdbc.driver=com.mysql.cj.jdbc.Driver
修改 src/main/assemblies/plugin.xml
//将 mysql 的 jar 包打包进 zip 包
<dependencySet>
<outputDirectory/>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>mysql:mysql-connector-java</include>
</includes>
</dependencySet>
修改 pom.xml
<elasticsearch.version>8.5.3</elasticsearch.version>
增加MySQL依赖
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.23</version>
</dependency>
查看分词效果
3、错误解决
【错误一】
在编译的时候出现以下错误
类文件具有错误的版本 61.0, 应为 52.0
原因是jdk版本不对,ElasticSearch在8.0+版本中,需要使用jdk17的版本,则需要在Idea中设置Project Structure中设置
【错误二】
[2022-12-20T14:33:03,590][INFO ][c.a.c.i.j.JacksonVersion ] [node-1] Package versions: jackson-annotations=2.13.2, jackson-core=2.13.2, jackson-databind=2.13.2.2, jackson-dataformat-xml=2.13.2, jackson-datatype-jsr310=2.13.2, azure-core=1.27.0, Troubleshooting version conflicts: https://aka.ms/azsdk/java/dependency/troubleshoot
[2022-12-20T14:33:06,363][ERROR][o.e.b.Elasticsearch ] [node-1] fatal exception while booting Elasticsearch
java.lang.IllegalStateException: failed to load plugin analysis-ik due to jar hell
at org.elasticsearch.plugins.PluginsUtils.checkBundleJarHell(PluginsUtils.java:267) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.plugins.PluginsService.loadBundles(PluginsService.java:289) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.plugins.PluginsService.<init>(PluginsService.java:159) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.plugins.PluginsService.lambda$getPluginsServiceCtor$14(PluginsService.java:634) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.node.Node.<init>(Node.java:411) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.node.Node.<init>(Node.java:318) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.bootstrap.Elasticsearch$2.<init>(Elasticsearch.java:214) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.bootstrap.Elasticsearch.initPhase3(Elasticsearch.java:214) ~[elasticsearch-8.5.3.jar:?]
at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:67) ~[elasticsearch-8.5.3.jar:?]
Caused by: java.lang.IllegalStateException: jar hell!
class: org.elasticsearch.plugin.api.Extensible
jar1: C:\Users\tetu\Desktop\myProject\tools\elasticsearch\elasticsearch-8.5.3\plugins\ik\elasticsearch-plugin-api-8.5.3.jar
jar2: C:\Users\tetu\Desktop\myProject\tools\elasticsearch\elasticsearch-8.5.3\lib\elasticsearch-plugin-api-8.5.3.jar
at org.elasticsearch.jdk.JarHell.checkClass(JarHell.java:315) ~[elasticsearch-core-8.5.3.jar:?]
at org.elasticsearch.jdk.JarHell.checkJarHell(JarHell.java:233) ~[elasticsearch-core-8.5.3.jar:?]
at org.elasticsearch.plugins.PluginsUtils.checkBundleJarHell(PluginsUtils.java:265) ~[elasticsearch-8.5.3.jar:?]
... 8 more
有可能是我打包问题,将ik分词器包下面其余的jar包删除,只保留以下的jar包
【错误三】
java.sql.SQLNonTransientConnectionException: Could not create connection to database server.
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:526) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:513) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:505) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:479) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:1779) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:1596) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:633) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:347) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:219) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at java.sql.DriverManager.getConnection(DriverManager.java:664) ~[?:1.8.0_181]
at java.sql.DriverManager.getConnection(DriverManager.java:247) ~[?:1.8.0_181]
at org.wltea.analyzer.dic.Dictionary.loadMySQLExtDict(Dictionary.java:464) [elasticsearch-analysis-ik-7.2.0.jar:?]
at org.wltea.analyzer.dic.Dictionary.loadMainDict(Dictionary.java:402) [elasticsearch-analysis-ik-7.2.0.jar:?]
at org.wltea.analyzer.dic.Dictionary.reLoadMainDict(Dictionary.java:698) [elasticsearch-analysis-ik-7.2.0.jar:?]
at org.wltea.analyzer.dic.Dictionary$HotDictReloadThread.run(Dictionary.java:714) [elasticsearch-analysis-ik-7.2.0.jar:?]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]
Caused by: java.security.AccessControlException: access denied ("java.net.SocketPermission" "localhost:3306" "connect,resolve")
at java.security.AccessControlContext.checkPermission(AccessControlContext.java:472) ~[?:1.8.0_181]
at java.security.AccessController.checkPermission(AccessController.java:884) ~[?:1.8.0_181]
at java.lang.SecurityManager.checkPermission(SecurityManager.java:549) ~[?:1.8.0_181]
at java.lang.SecurityManager.checkConnect(SecurityManager.java:1051) ~[?:1.8.0_181]
at java.net.Socket.connect(Socket.java:584) ~[?:1.8.0_181]
at com.mysql.cj.core.io.StandardSocketFactory.connect(StandardSocketFactory.java:202) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.mysqla.io.MysqlaSocketConnection.connect(MysqlaSocketConnection.java:57) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.mysqla.MysqlaSession.connect(MysqlaSession.java:122) ~[mysql-connector-java-6.0.6.jar:6.0.6]
at com.mysql.cj.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:1726) ~[mysql-connector-java-6.0.6.jar:6.0.6]
修改 $JAVA_HOME//jre/lib/security/java.policy
grant {
// needed because of the hot reload functionality
permission java.net.SocketPermission "*", "connect,resolve";
};
【错误四】
Caused by: java.security.AccessControlException: access denied ("java.lang.RuntimePermission" "setContextClassLoader")
at java.security.AccessControlContext.checkPermission(AccessControlContext.java:485) ~[?:?]
at java.security.AccessController.checkPermission(AccessController.java:1068) ~[?:?]
at java.lang.SecurityManager.checkPermission(SecurityManager.java:411) ~[?:?]
at java.lang.Thread.setContextClassLoader(Thread.java:2457) ~[?:?]
at com.mysql.cj.jdbc.AbandonedConnectionCleanupThread.lambda$static$0(AbandonedConnectionCleanupThread.java:77) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.<init>(ThreadPoolExecutor.java:637) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor.addWorker(ThreadPoolExecutor.java:928) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1364) ~[?:?]
at java.util.concurrent.Executors$DelegatedExecutorService.execute(Executors.java:759) ~[?:?]
at com.mysql.cj.jdbc.AbandonedConnectionCleanupThread.<clinit>(AbandonedConnectionCleanupThread.java:80) ~[?:?]
修改 $JAVA_HOME//jre/lib/security/java.policy
grant {
// needed because of the hot reload functionality
permission java.net.SocketPermission "*", "connect,resolve";
permission java.lang.RuntimePermission "createClassLoader";
permission java.lang.RuntimePermission "getClassLoader";
permission java.lang.RuntimePermission "accessDeclaredMembers";
permission java.lang.RuntimePermission "setContextClassLoader";
};
如果上述修改不成功,则需要在es的安装目录下的config文件夹下新建文件java.policy
grant {
// needed because of the hot reload functionality
permission java.net.SocketPermission "*", "connect,resolve";
permission java.lang.RuntimePermission "createClassLoader";
permission java.lang.RuntimePermission "getClassLoader";
permission java.lang.RuntimePermission "accessDeclaredMembers";
permission java.lang.RuntimePermission "setContextClassLoader";
};
再修改 jvm.options 下新增
-Djava.security.policy=config/java.policy
【错误5】
The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
at com.mysql.cj.jdbc.exceptions.SQLError.createCommunicationsException(SQLError.java:174) ~[?:?]
at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:64) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:833) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:453) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:246) ~[?:?]
at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:198) ~[?:?]
at java.sql.DriverManager.getConnection(DriverManager.java:683) ~[java.sql:?]
at java.sql.DriverManager.getConnection(DriverManager.java:230) ~[java.sql:?]
at org.wltea.analyzer.dic.Dictionary.loadMySQLExtDict(Dictionary.java:415) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.loadMainDict(Dictionary.java:399) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.reLoadMainDict(Dictionary.java:670) ~[?:?]
at org.wltea.analyzer.dic.HotDict.run(HotDict.java:7) ~[?:?]
at java.lang.Thread.run(Thread.java:1589) ~[?:?]
Caused by: com.mysql.cj.exceptions.CJCommunicationsException: Communications link failure
修改配置文件
jdbc.url=jdbc:mysql://localhost:3307/elasticsearch?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false
【错误6】
Caused by: java.security.AccessControlException: access denied ("java.security.SecurityPermission" "putProviderProperty.MySQLScramShaSasl")
at java.security.AccessControlContext.checkPermission(AccessControlContext.java:485) ~[?:?]
at java.security.AccessController.checkPermission(AccessController.java:1068) ~[?:?]
at java.lang.SecurityManager.checkPermission(SecurityManager.java:411) ~[?:?]
at java.lang.SecurityManager.checkSecurityAccess(SecurityManager.java:1526) ~[?:?]
at java.security.Provider.check(Provider.java:816) ~[?:?]
at java.security.Provider.putService(Provider.java:1356) ~[?:?]
at com.mysql.cj.sasl.ScramShaSaslProvider.lambda$new$0(ScramShaSaslProvider.java:78) ~[?:?]
at java.security.AccessController.doPrivileged(AccessController.java:318) ~[?:?]
at com.mysql.cj.sasl.ScramShaSaslProvider.<init>(ScramShaSaslProvider.java:77) ~[?:?]
at com.mysql.cj.protocol.a.authentication.AuthenticationLdapSaslClientPlugin.<clinit>(AuthenticationLdapSaslClientPlugin.java:73) ~[?:?]
... 38 more
修改数据库链接权限,查看是否可以外部链接
mysql> CREATE USER 'root'@'%' IDENTIFIED BY 'password';
Query OK, 0 rows affected (0.02 sec)
mysql> grant all privileges on *.* to 'root'@'%' ;
Query OK, 0 rows affected (0.01 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)
【错误7】
java.sql.SQLNonTransientConnectionException: Public Key Retrieval is not allowed
at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:110) ~[?:?]
at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:833) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:453) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:246) ~[?:?]
at com.mysql.cj.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:198) ~[?:?]
at java.sql.DriverManager.getConnection(DriverManager.java:683) ~[java.sql:?]
at java.sql.DriverManager.getConnection(DriverManager.java:230) ~[java.sql:?]
at org.wltea.analyzer.dic.Dictionary.loadMySQLExtStopDict(Dictionary.java:604) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.loadStopWordDict(Dictionary.java:587) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.reLoadMainDict(Dictionary.java:671) ~[?:?]
at org.wltea.analyzer.dic.HotDict.run(HotDict.java:7) ~[?:?]
at java.lang.Thread.run(Thread.java:1589) ~[?:?]
Caused by: com.mysql.cj.exceptions.UnableToConnectException: Public Key Retrieval is not allowed
at jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:67) ~[?:?]
at java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500) ~[?:?]
at java.lang.reflect.Constructor.newInstance(Constructor.java:484) ~[?:?]
at com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:61) ~[?:?]
at com.mysql.cj.exceptions.ExceptionFactory.createException(ExceptionFactory.java:85) ~[?:?]
at com.mysql.cj.protocol.a.authentication.CachingSha2PasswordPlugin.nextAuthenticationStep(CachingSha2PasswordPlugin.java:128) ~[?:?]
at com.mysql.cj.protocol.a.authentication.CachingSha2PasswordPlugin.nextAuthenticationStep(CachingSha2PasswordPlugin.java:49) ~[?:?]
at com.mysql.cj.protocol.a.NativeAuthenticationProvider.proceedHandshakeWithPluggableAuthentication(NativeAuthenticationProvider.java:441) ~[?:?]
at com.mysql.cj.protocol.a.NativeAuthenticationProvider.connect(NativeAuthenticationProvider.java:174) ~[?:?]
at com.mysql.cj.protocol.a.NativeProtocol.connect(NativeProtocol.java:1350) ~[?:?]
at com.mysql.cj.NativeSession.connect(NativeSession.java:157) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:953) ~[?:?]
at com.mysql.cj.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:823) ~[?:?]
... 10 more
jdbc链接增加 allowPublicKeyRetrieval=true,如下
jdbc.url=jdbc:mysql://localhost:3307/elasticsearch?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false&allowPublicKeyRetrieval=true