solr 配置updatechain实现动态更新索引
需求:solr在添加数据时根据特定索引调用hanlp根据词性获取关键字,自动添加到新的索引。
话不多说直接看代码
package custom.solr;
/**
* @author:lulux
* @create: 2022-09-23 17:59
* @Description: solr在更新时自动加入标签
*/
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.NShort.NShortSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.solr.util.RefCounted;
import java.io.IOException;
import java.util.*;
public class ConditionUpdateProcessFactory extends UpdateRequestProcessorFactory {
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
return new ConditionalUpdateProcessor(req, rsp, next);
}
}
class ConditionalUpdateProcessor extends UpdateRequestProcessor {
public static String ORIGIN_TIMESTAMP1 = "";
public static String ORIGIN_TIMESTAMP2 = "";
public static String SUBMIT = "";
public static String WORD_NATURE = "";
static {
try {
GetParameters getParameters = new GetParameters();
Map<String, String> data = getParameters.getData();
ORIGIN_TIMESTAMP1 = data.get("field");//索引1
ORIGIN_TIMESTAMP2 = data.get("field2");//索引2
SUBMIT = data.get("submit");//需要更新数据的索引
WORD_NATURE = data.get("wordNature");//分词词性
} catch (IOException e) {
}
}
public ConditionalUpdateProcessor(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
super(next);
core = req.getCore();
}
private final SolrCore core;
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
SolrInputDocument newDoc = cmd.getSolrInputDocument();
BytesRef indexedId = cmd.getIndexedId();
RefCounted<SolrIndexSearcher> newestSearcher = core.getRealtimeSearcher();
SolrIndexSearcher searcher;
long lookup;
searcher = (SolrIndexSearcher) newestSearcher.get();
lookup = searcher.lookupId(indexedId);
//if not exists
if (lookup < 0)
{super.processAdd(cmd);
}
/*获取修改的内容 */
String newOriginTimestamp1="";
String newOriginTimestamp2="";
try {
newOriginTimestamp1 = newDoc.getFieldValue(ORIGIN_TIMESTAMP1).toString();
}catch (Exception e){
}
try {
newOriginTimestamp2 = newDoc.getFieldValue(ORIGIN_TIMESTAMP2).toString();
}catch (Exception e){
}
String newOriginTimestamp = newOriginTimestamp1+newOriginTimestamp2;
Segment segment = HanLP.newSegment().enableCustomDictionaryForcing(true);
List<Term> seg = segment.seg(newOriginTimestamp);
Set<String> list = new HashSet<String>();
for (Term ss :seg) {
/*查询分词中是否有词性为nt、ns的词*/
boolean a = ss.nature.startsWith(WORD_NATURE);
if (a){
/*去除/ntt,/ns后缀*/
String value = String.valueOf(ss).replace("/"+WORD_NATURE, "");
String values = value.replace("}","");
list.add(values);
}
}
newDoc.setField(SUBMIT, list);
// pass it up the chain
super.processAdd(cmd);
}
}
GetParameters类用于获取外部配置文件里的配置项
package custom.solr;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
/**
* @author:lulux
* @create: 2022-10-18 11:39
* @Description:用于获取外部文件里需要监听的索引名称,和插入数据的索引名称以及分词词性
*/
public class GetParameters {
public Map<String,String> getData() throws IOException {
Map<String,String> map = new HashMap<String, String>();
//获取jar包同级目录
String path = this.getClass().getProtectionDomain().getCodeSource().getLocation().getPath();
System.out.println(path);
String[] pathSplit = path.split("/");
String jarName = pathSplit[pathSplit.length - 1];
String jarName1 = pathSplit[pathSplit.length - 2];
String jarName2 = "/"+jarName1+"/"+jarName;
String jarPath = path.replace(jarName2, "");
String pathName = jarPath + "/classes/"+"labelData.properties";
//读取配置文件
Properties properties = new Properties();
File file = new File(pathName);
FileInputStream fis = new FileInputStream(file);
properties.load(fis);
fis.close();
//获取配置文件数据
String field = properties.getProperty("field");
String field2 = properties.getProperty("field2");
String submit = properties.getProperty("submit");
String wordNature = properties.getProperty("wordNature");
map.put("field",field);
map.put("field2",field2);
map.put("submit",submit);
map.put("wordNature",wordNature);
return map;
}
}
以上步骤完成后将代码打包上传到solr服务器
2.接下来配置solr
配置solrconfig.xml的默认update用哪个chain名字。
<requestHandler name="/update" class="solr.UpdateRequestHandler">
<!-- See below for information on defining
updateRequestProcessorChains that can be used by name
on each Update Request
-->
<lst name="defaults">
<str name="update.chain">condition</str>
</lst>
</requestHandler>
#chani的名字
<updateRequestProcessorChain name="condition">
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.DistributedUpdateProcessorFactory" />
<processor class="custom.solr.ConditionUpdateProcessFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>