solr开发之拼写检查spellcheck

首先,修改schema.xml,增加需要检查的字段

<field name="spell" type="text_spell" indexed="true" stored="false" multiValued="true"/>
<copyField source="productName" dest="spell"/>
<fieldType name="text_spell" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
			<tokenizer class="org.ansj.solr5.AnsjTokenizerFactory" 
                query="false" pstemming="true" stopwordsDir="stopwords/stopwords.dic"/>
			 <filter class="org.apache.lucene.analysis.pinyin.solr5.PinyinTokenFilterFactory"
                pinyinAll="true" minTermLenght="2" maxTermLenght="15"/>
            <filter class="org.apache.lucene.analysis.pinyin.solr5.PinyinEdgeNGramTokenFilterFactory"/>
        </analyzer>
		<analyzer type="query">
			<tokenizer class="solr.WhitespaceTokenizerFactory"/>
		</analyzer>
		<analyzer>
            <tokenizer class="solr.KeywordTokenizerFactory"/>
            <filter class="solr.LowerCaseFilterFactory"/>
        </analyzer>
    </fieldType>
这里查询不需要分词


然后是配置solrconfig.xml文件,添加组件和处理器

<searchComponent name="spellerror" class="solr.SpellCheckComponent">
    <str name="queryAnalyzerFieldType">string</str>
    <!-- a spellchecker built from a field of the main index   --> 
	<lst name="spellchecker">
		<str name="name">default</str>
		<!--这里指明需要根据哪个字段的索引为依据进行拼写检查。现配置 名为 spell 的字段-->
		<str name="field">spell</str>
		<str name="classname">solr.DirectSolrSpellChecker</str>
		<str name="distanceMeasure">internal</str>
		<float name="accuracy">0.5</float>
		<int name="maxEdits">2</int>
		<int name="minPrefix">1</int>
		<int name="maxInspections">5</int>
		<int name="minQueryLength">2</int>
		<float name="maxQueryFrequency">0.01</float>
    </lst>
	<lst name="spellchecker">
		<str name="classname">solr.FileBasedSpellChecker</str>
		<str name="name">file</str>
		<str name="sourceLocation">spellings.txt</str>
		<str name="characterEncoding">UTF-8</str>
		<str name="spellcheckIndexDir">spellcheckerFile</str>
    </lst>
  </searchComponent>
  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
	<!--默认参数-->
    <lst name="defaults">
      <str name="df">spell</str>
      <str name="spellcheck.dictionary">default</str>
      <str name="spellcheck">on</str>
      <!-- <str name="spellcheck.extendedResults">true</str>       
      <str name="spellcheck.count">10</str>
      <str name="spellcheck.alternativeTermCount">5</str>
      <str name="spellcheck.maxResultsForSuggest">5</str> -->       
      <str name="spellcheck.collate">true</str>
      <str name="spellcheck.collateExtendedResults">true</str>  
      <!-- <str name="spellcheck.maxCollationTries">10</str>
      <str name="spellcheck.maxCollations">5</str> -->          
    </lst>
    <arr name="last-components">
      <str>spellerror</str>
    </arr>
  </requestHandler>
配置完后,需要重建索引才能生效


最后是solrJ里的代码:

/**
	 * 拼写检查
	 * 
	 * @param keyword
	 * @return
	 */
	public String spellCheck(String keyword)
	{
		String[] wordArray = keyword.split(" ");
		String s = "";
		for (String word : wordArray)
		{
			SolrQuery query = new SolrQuery();
			query.set("spellcheck", "true");
			query.set("spellcheck.q", word);
			query.set("qt", "/spell");
			query.set("spellcheck.build", "true");// 遇到新的检查词,会自动添加到索引里面
			query.set("spellcheck.count", 5);
			QueryResponse response = null;
			try
			{
				response = solrClient.query(coreName, query);
				System.out.println("耗时:" + response.getQTime());
				SpellCheckResponse spellres = response.getSpellCheckResponse();
				if (spellres != null)
				{
					if (!spellres.isCorrectlySpelled())
					{
						List<Suggestion> suggestion = spellres.getSuggestions();
//						for (int i = 0; i < suggestion.size(); i++)
//						{
//							System.out.println("-----推荐词:" + suggestion.get(i).getAlternatives());
//						}
						String spellWord = spellres.getFirstSuggestion(word);
						System.out.println("推荐词:" + spellWord);
						if(spellWord != null){
							s += spellWord + " ";
						}else{
							s += word + " ";
						}
//						return spellWord;
					}
				}
			} catch (SolrServerException | IOException e)
			{
				throw new SolrWrapException(e);
			}
		}
		s = s.trim();
		return s;
	}


参考文献:

http://www.cnblogs.com/HD/p/3993424.html



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值