solr 配置updatechain实现动态更新索引

solr 配置updatechain实现动态更新索引

需求:solr在添加数据时根据特定索引调用hanlp根据词性获取关键字,自动添加到新的索引。

话不多说直接看代码

package custom.solr;


/**
 * @author:lulux
 * @create: 2022-09-23 17:59
 * @Description: solr在更新时自动加入标签
 */

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.NShort.NShortSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.solr.util.RefCounted;

import java.io.IOException;
import java.util.*;


public class  ConditionUpdateProcessFactory extends UpdateRequestProcessorFactory {
    @Override
    public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
        return new ConditionalUpdateProcessor(req, rsp, next);
    }
}

class ConditionalUpdateProcessor extends UpdateRequestProcessor {
    public static String ORIGIN_TIMESTAMP1 = "";
    public static String ORIGIN_TIMESTAMP2 = "";
    public static String SUBMIT = "";
    public static String WORD_NATURE = "";

    static {
        try {
            GetParameters getParameters = new GetParameters();
            Map<String, String> data = getParameters.getData();
            ORIGIN_TIMESTAMP1 = data.get("field");//索引1
            ORIGIN_TIMESTAMP2 = data.get("field2");//索引2
            SUBMIT = data.get("submit");//需要更新数据的索引
            WORD_NATURE = data.get("wordNature");//分词词性 

        } catch (IOException e) {
        }
    }

    public ConditionalUpdateProcessor(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
        super(next);
        core = req.getCore();
    }

    private final SolrCore core;

    @Override
    public void processAdd(AddUpdateCommand cmd) throws IOException {

        SolrInputDocument newDoc = cmd.getSolrInputDocument();
        BytesRef indexedId = cmd.getIndexedId();
        RefCounted<SolrIndexSearcher> newestSearcher = core.getRealtimeSearcher();
        SolrIndexSearcher searcher;
        long lookup;
        searcher = (SolrIndexSearcher) newestSearcher.get();
        lookup = searcher.lookupId(indexedId);
        //if not exists
        if (lookup < 0)
        {super.processAdd(cmd);
        }
        /*获取修改的内容 */
        String newOriginTimestamp1="";
        String newOriginTimestamp2="";
        try {
             newOriginTimestamp1 = newDoc.getFieldValue(ORIGIN_TIMESTAMP1).toString();
        }catch (Exception e){

        }
        try {
             newOriginTimestamp2 = newDoc.getFieldValue(ORIGIN_TIMESTAMP2).toString();
        }catch (Exception e){

        }
        String newOriginTimestamp = newOriginTimestamp1+newOriginTimestamp2;

        Segment segment = HanLP.newSegment().enableCustomDictionaryForcing(true);
        List<Term> seg = segment.seg(newOriginTimestamp);
        Set<String> list = new HashSet<String>();
        for (Term ss :seg) {
            /*查询分词中是否有词性为nt、ns的词*/
            boolean a = ss.nature.startsWith(WORD_NATURE);
            if (a){
                /*去除/ntt,/ns后缀*/
                String value = String.valueOf(ss).replace("/"+WORD_NATURE, "");
                String values = value.replace("}","");
                list.add(values);
            }

        }

        newDoc.setField(SUBMIT, list);
    // pass it up the chain
        super.processAdd(cmd);
    }

}

GetParameters类用于获取外部配置文件里的配置项

package custom.solr;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

/**
 * @author:lulux
 * @create: 2022-10-18 11:39
 * @Description:用于获取外部文件里需要监听的索引名称,和插入数据的索引名称以及分词词性
 */
public class GetParameters {

    public Map<String,String> getData() throws IOException {
        Map<String,String> map = new HashMap<String, String>();
        //获取jar包同级目录
        String path = this.getClass().getProtectionDomain().getCodeSource().getLocation().getPath();
        System.out.println(path);
        String[] pathSplit = path.split("/");
        String jarName = pathSplit[pathSplit.length - 1];
        String jarName1 = pathSplit[pathSplit.length - 2];
        String jarName2 = "/"+jarName1+"/"+jarName;
        String jarPath = path.replace(jarName2, "");
        String pathName = jarPath + "/classes/"+"labelData.properties";


        //读取配置文件
        Properties properties = new Properties();
        File file = new File(pathName);
        FileInputStream fis = new FileInputStream(file);
        properties.load(fis);
        fis.close();

        //获取配置文件数据
        String field = properties.getProperty("field");
        String field2 = properties.getProperty("field2");
        String submit = properties.getProperty("submit");
        String wordNature = properties.getProperty("wordNature");

        map.put("field",field);
        map.put("field2",field2);
        map.put("submit",submit);
        map.put("wordNature",wordNature);

        return map;
    }


}

以上步骤完成后将代码打包上传到solr服务器

2.接下来配置solr

配置solrconfig.xml的默认update用哪个chain名字。

<requestHandler name="/update" class="solr.UpdateRequestHandler">
    <!-- See below for information on defining 
         updateRequestProcessorChains that can be used by name 
         on each Update Request
      -->
      <lst name="defaults">
         <str name="update.chain">condition</str>
       </lst>
  </requestHandler>
        
#chani的名字
 <updateRequestProcessorChain name="condition">
       <processor class="solr.LogUpdateProcessorFactory" />
       <processor class="solr.DistributedUpdateProcessorFactory" />
       <processor class="custom.solr.ConditionUpdateProcessFactory" />
       <processor class="solr.RunUpdateProcessorFactory" />
 </updateRequestProcessorChain>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

鹿鹿熊

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值