solr wiki: http://wiki.apache.org/solr/Suggester/
实现对搜索时关键字的提示,同时扩展,将词库中未有的关键词添加到词库中,目前不支持去重功能。
一、solrconfig.xml 配置
在solrconfig.xml配置文件中添加 “关键词提示” 组件
<searchComponent name="suggest" class="com.netboy.demo.function.SpellCheckAndLoadComponent">
<lst name="spellchecker">
<str name="name">suggest</str>
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
<str name="field">title</str>
<str name="sourceLocation">stopwords.txt</str>
<str name="size">4</str>
<!-- <float name="threshold">0.005</float> -->
<str name="buildOnCommit">true</str>
</lst>
</searchComponent>
<requestHandler name="/suggest" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="spellcheck">true</str>
<str name="spellcheck.dictionary">suggest</str>
<str name="spellcheck.count">5</str>
<str name="spellcheck.onlyMorePopular">true</str>
<str name="spellcheck.collate">true</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
stopwords.txt内容如下:
jetty
netty
淘宝
taobao
水杯
章法
天龙八部
虚竹
木婉清
段誉
章
人
人 民
嫁了人
可以在关键词后面加上权重,tab键+数字,数值越大权重越大。
二、 扩展SpellCheckComponent
自定义类SpellCheckAndLoadComponent类,实现: solr搜索提示,将词添加到词库中
代码如下:
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Set;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.eclipse.jetty.util.ConcurrentHashSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 返回已经有的提示词,如果没有则将该词添加到词库中
*
* @author 2013-7-13下午10:09:36
*/
public class SpellCheckAndLoadComponent extends SpellCheckComponent {
private static final Logger logger = LoggerFactory.getLogger(SpellCheckAndLoadComponent.class);
private static ConcurrentHashSet<String> spellSet = new ConcurrentHashSet<String>();
private BufferedWriter out;
private int size = 99;
private String field;
@Override
public void init(NamedList args) {
super.init(args);
Object o = args.get("spellchecker");
String fileStr = null;
if(o != null && o instanceof NamedList) {
NamedList temp = (NamedList) o;
fileStr = (String) temp.get("sourceLocation");
String sizeStr = (String) temp.get("size");
field = (String) temp.get("field");
if(sizeStr != null) {
size = Integer.parseInt(sizeStr);
}
logger.info("the dictionary is " + fileStr);
}
synchronized(this) {
if(fileStr != null) {
try {
out = new BufferedWriter(new FileWriter(fileStr, true));
} catch(FileNotFoundException e) {
e.printStackTrace();
} catch(IOException e) {
e.printStackTrace();
}
}
}
}
@Override
public void inform(SolrCore solrCore) {
super.inform(solrCore);
synchronized(this) {
if(spellSet == null) {
spellSet = new ConcurrentHashSet<String>();
}
}
}
@Override
public void process(ResponseBuilder rb) {
String words = null;
try {
super.process(rb);
SolrQueryRequest sqr = rb.req;
SolrParams param = sqr.getParams();
words = param.get("q").trim();
String[] subString = words.split(":");
if(subString[0].equals(field)) {
words = subString[1].trim();
synchronized(this) {
spellSet.add(words);
if(spellSet.size() >= size) {
pushFile(spellSet);
spellSet.clear();
}
}
}
} catch(IOException e) {
logger.warn("spellCheck had some error" + e);
}
}
public void pushFile(Set<String> set) {
for(String str : set) {
if(str != null) {
try {
out.write(str);
out.newLine();
out.flush();
} catch(IOException e) {
logger.warn("error in writing dictionary" + e);
}
}
}
}
@Override
public void finishStage(ResponseBuilder rb) {
super.finishStage(rb);
pushFile(spellSet);
synchronized(out) {
try {
out.close();
} catch(IOException e) {
logger.info("close the FileOutputStream" + e);
}
}
spellSet.clear();
}
}
三、运行
第一次运行:http://127.0.0.1:8989/auction/suggest?q=title:%E4%BA%BA&spellcheck=on&spellcheck.build=true
返回结果:
如果词库中没有关键词如: http://127.0.0.1:8989/auction/suggest?q=title:电子科大&spellcheck=on&spellcheck.build=true
返回结果:
第二次运行: http://127.0.0.1:8989/auction/suggest?q=title:电&spellcheck=on&spellcheck.build=true
返回结果:
四、客户端连接
public class SpellCheckTest {
public static void main(String[] args) {
SolrServer solrServer = new HttpSolrServer("http://127.0.0.1:8989/auction");
// http://127.0.0.1:8989/auction/suggest?q=人&spellcheck=on&spellcheck.build=true
SolrQuery params = new SolrQuery();
String token = "人";
params.set("qt", "/suggest");
params.set("q", token);
params.set("spellcheck", "true");
params.set("spellcheck.build", "true");
QueryResponse response = null;
try {
response = solrServer.query(params);
} catch(SolrServerException e) {
System.err.println(e.getMessage());
e.printStackTrace();
} catch(Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
} finally {
solrServer.shutdown();
}
SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse();
if(spellCheckResponse != null) {
List<Suggestion> suggestionList = spellCheckResponse.getSuggestions();
for(Suggestion suggestion : suggestionList) {
System.out.println("Suggestions NumFound: " + suggestion.getNumFound());
System.out.println("Token: " + suggestion.getToken());
System.out.print("Suggested: ");
List<String> suggestedWordList = suggestion.getAlternatives();
for(String word : suggestedWordList) {
System.out.println(word);
}
System.out.println();
}
System.out.println("查询耗时:" + response.getQTime());
solrServer.shutdown();
}
}
}
运行结果:
Suggestions NumFound: 1
Token: 人
Suggested: 人 民
查询耗时:6