FieldAnalysisRequestHandler可以对某个字段或字段类型的分词器对查询串取到分词数据。 先看 solr 的默认配置,
<!-- IKAnalyzer 中文分词 -->
<fieldType name="IKAnalyzerText" class="solr.TextField" positionIncrementGap="100" >
<analyzer type="index" >
<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="false" conf="ik.conf"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--实现拼音搜索 暂时不用-->
<!-- <filter class="com.shentong.search.analyzers.PinyinTransformTokenFilterFactory" minTermLenght="2" />-->
<!-- <filter class="com.shentong.search.analyzers.PinyinNGramTokenFilterFactory" minGram="1" maxGram="20" />-->
<!--过滤条件,大小写,听用词,同义词,分词数量-->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.LengthFilterFactory" min="1" max="10" />
</analyzer>
<analyzer type="query">
<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="true" conf="ik.conf" isMaxWordLength="false" />
</analyzer>
</fieldType>
输入连接地址为:http://localhost:1234/taoye/analysis/field?q=麻辣香辣&analysis.fieldtype=text&indent=on&wt=json 显示的是搜索后分词出的结果,对应的是配置文件中的
<analyzer type="query">
<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="true" conf="ik.conf" isMaxWordLength="false" />
</analyzer>
若输入为:http://localhost:1234/taoye/analysis/field?analysis.fieldvalue=麻辣香辣&analysis.fieldtype=text&indent=on&wt=json 则显示出分词结果,结果如下
<response>
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">2</int>
</lst>
<lst name="analysis">
<lst name="field_types">
<lst name="IKAnalyzerText">
<lst name="index">
<arr name="org.wltea.analyzer.lucene.IKTokenizer">...</arr>
<arr name="org.apache.lucene.analysis.core.LowerCaseFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.core.LowerCaseFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.core.StopFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.synonym.SynonymFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
<str name="type">CN_WORD</str>
<int name="start">0</int>
<int name="end">2</int>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
<str name="type">CN_WORD</str>
<int name="start">2</int>
<int name="end">4</int>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
</lst>
</arr>
<arr name="org.apache.lucene.analysis.miscellaneous.LengthFilter">
<lst>
<str name="text">麻辣</str>
<str name="raw_bytes">[e9 ba bb e8 be a3]</str>
<int name="position">1</int>
<arr name="positionHistory">
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
<int>1</int>
</arr>
<int name="start">0</int>
<int name="end">2</int>
<str name="type">CN_WORD</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
</lst>
<lst>
<str name="text">香辣</str>
<str name="raw_bytes">[e9 a6 99 e8 be a3]</str>
<int name="position">2</int>
<arr name="positionHistory">
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
<int>2</int>
</arr>
<int name="start">2</int>
<int name="end">4</int>
<str name="type">CN_WORD</str>
<int name="org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength">1</int>
</lst>
</arr>
</lst>
</lst>
</lst>
<lst name="field_names"/>
</lst>
</response>
使用solrj我们可以实践如下:
public List<TokenInfo> getAnalysisWords(String words,String fieldType){
FieldAnalysisRequest request = new FieldAnalysisRequest("/analysis/field");
request.addFieldType(fieldType);
request.setFieldValue("text");
request.setFieldValue(words);
FieldAnalysisResponse response=null;
try {
response = request.process(solrContext);
} catch (SolrServerException e) {
throw new EbossBaseException("生成分词失败!");
} catch (IOException e) {
throw new EbossBaseException("生成分词失败!");
}
Iterator it = response.getFieldTypeAnalysis(fieldType).getIndexPhases().iterator();//分词结果
// Iterator it = response.getFieldTypeAnalysis(fieldType).getQueryPhases().iterator(); //检索中的切分效果
List<TokenInfo> list=null;
while(it.hasNext()) {
AnalysisPhase pharse = (AnalysisPhase)it.next();
list = pharse.getTokens();
}
return list;
}