solr FieldAnalysisRequestHandler 取得solr的分词方法

FieldAnalysisRequestHandler可以对某个字段或字段类型的分词器对查询串取到分词数据。 先看 solr 的默认配置,

  <!-- IKAnalyzer 中文分词 --> 
	     <fieldType name="IKAnalyzerText" class="solr.TextField" positionIncrementGap="100" >  
			   <analyzer type="index" >   
					 <tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="false" conf="ik.conf"/>
					 <filter class="solr.LowerCaseFilterFactory"/>  
					 	   <!--实现拼音搜索  暂时不用--> 
					<!-- <filter class="com.shentong.search.analyzers.PinyinTransformTokenFilterFactory" minTermLenght="2" />--> 
					<!-- <filter class="com.shentong.search.analyzers.PinyinNGramTokenFilterFactory" minGram="1" maxGram="20" />--> 
								   <!--过滤条件,大小写,听用词,同义词,分词数量--> 
					   <filter class="solr.LowerCaseFilterFactory"/>  
					   <filter class="solr.StopFilterFactory" ignoreCase="true"  words="stopwords.txt" enablePositionIncrements="true" /> 
					   <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
					   <filter class="solr.LengthFilterFactory" min="1" max="10" />
	     </analyzer>
			  <analyzer type="query">
                     			<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="true" conf="ik.conf" isMaxWordLength="false" />
              </analyzer>
	   </fieldType>  

输入连接地址为:http://localhost:1234/taoye/analysis/field?q=麻辣香辣&analysis.fieldtype=text&indent=on&wt=json  显示的是搜索后分词出的结果,对应的是配置文件中的

  <analyzer type="query">
                     			<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="true" conf="ik.conf" isMaxWordLength="false" />
  </analyzer>

若输入为:http://localhost:1234/taoye/analysis/field?analysis.fieldvalue=麻辣香辣&analysis.fieldtype=text&indent=on&wt=json 则显示出分词结果,结果如下

<response>
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">2</int>
</lst>
<lst name="analysis">
<lst name="field_types">
<lst name="IKAnalyzerText">
<lst name="index">
<arr name="org.wltea.analyzer.lucene.IKTokenizer">...</arr>
<arr name="org.apache.lucene.analysis.core.LowerCaseFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.core.LowerCaseFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.core.StopFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.synonym.SynonymFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
<str name="type">CN_WORD</str>
<int name="start">0</int>
<int name="end">2</int>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
<str name="type">CN_WORD</str>
<int name="start">2</int>
<int name="end">4</int>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.miscellaneous.LengthFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
</lst>
</arr>
</lst>
</lst>
</lst>
<lst name="field_names"/>
</lst>
</response>
使用solrj我们可以实践如下:
public List<TokenInfo> getAnalysisWords(String words,String fieldType){
		  FieldAnalysisRequest request = new FieldAnalysisRequest("/analysis/field");
		    request.addFieldType(fieldType);
		    request.setFieldValue("text");
		    request.setFieldValue(words);
		    FieldAnalysisResponse response=null;
			try {
				response = request.process(solrContext);
			} catch (SolrServerException e) {
				throw new EbossBaseException("生成分词失败!");
			} catch (IOException e) {
				throw new EbossBaseException("生成分词失败!");
			}
		    Iterator it = response.getFieldTypeAnalysis(fieldType).getIndexPhases().iterator();//分词结果
	        //  Iterator it = response.getFieldTypeAnalysis(fieldType).getQueryPhases().iterator(); //检索中的切分效果
		    List<TokenInfo> list=null;
		    while(it.hasNext()) {
		      AnalysisPhase pharse = (AnalysisPhase)it.next();
		      list = pharse.getTokens();

		    }
		    return list;
	}



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值