引入maven依赖
<!-- lucene -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>7.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>7.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-backward-codecs</artifactId>
<version>7.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
<version>7.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.1.0</version>
</dependency>
<!-- ikanalyzer -->
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
IK分词组件
public class IKTokenizer extends Tokenizer {
/**
* IK分词器实现
*/
private IKSegmenter ikimplement;
/**
* 词元文本属性
*/
private final CharTermAttribute termAtt;
/**
* 词元位移属性
*/
private final OffsetAttribute offsetAtt;
/**
* 词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
*/
private final TypeAttribute typeAtt;
/**
* 记录最后一个词元的结束位置
*/
private int endPosition;
IKTokenizer(Reader in) {
this(in, false);
}
private IKTokenizer(Reader in, boolean useSmart) {
offsetAtt = addAttribute(OffsetAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
ikimplement = new IKSegmenter(in, useSmart);
}
@Override
public boolean incrementToken() throws IOException {
// 清除所有的词元属性
clearAttributes();
Lexeme nextLexeme = ikimplement.next();
if (nextLexeme != null) {
// 将Lexeme转成Attributes
// 设置词元文本
termAtt.append(nextLexeme.getLexemeText());
// 设置词元长度
termAtt.setLength(nextLexeme.getLength());
// 设置词元位移
offsetAtt.setOffset(nextLexeme.getBeginPosition(),
nextLexeme.getEndPosition());
// 记录分词的最后位置
endPosition = nextLexeme.getEndPosition();
// 记录词元分类
typeAtt.setType(nextLexeme.getLexemeTypeString());
// 返会true告知还有下个词元
return true;
}
// 返会false告知词元输出完毕
return false;
}
@Override
public void reset() throws IOException {
super.reset();
ikimplement.reset(input);
}
@Override
public final void end() {
// set final offset
int finalOffset = correctOffset(this.endPosition);
offsetAtt.setOffset(finalOffset, finalOffset);
}
}
IK分词
public class IKAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String arg0) {
try {
IKTokenizer tokenizer = new IKTokenizer(new StringReader(arg0));
TokenStream stream = new StandardFilter(tokenizer);
stream = new LowerCaseFilter(stream);
stream = new StopFilter(stream, getStopwordSet());
return new TokenStreamComponents(tokenizer, stream);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
搜索纠错
public class CnSpellChecker implements Closeable {
private Directory spellIndex;
private float bStart;
private float bEnd;
private IndexSearcher searcher;
private final Object searcherLock;
private final Object modifyCurrentIndexLock;
private volatile boolean closed;
private float accuracy;
private StringDistance sd;
private Comparator<SuggestWord> comparator;
private CnSpellChecker(Directory spellIndex, StringDistance sd) throws IOException {
this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR);
}
CnSpellChecker(Directory spellIndex) throws IOException {
this(spellIndex, new LevensteinDistance());
}
private CnSpellChecker(Directory spellIndex, StringDistance sd, Comparator<SuggestWord> comparator) throws IOException {
this.bStart = 2.0F;
this.bEnd = 1.0F;
this.searcherLock = new Object();
this.modifyCurrentIndexLock = new Object();
this.closed = false;
this.accuracy = 0.5F;
this.setSpellIndex(spellIndex);
this.setStringDistance(sd);
this.comparator = comparator;
}
private void setSpellIndex(Directory spellIndexDir) throws IOException {
synchronized (this.modifyCurrentIndexLock) {
this.ensureOpen();
if (!DirectoryReader.indexExists(spellIndexDir)) {
IndexWriter writer = new IndexWriter(spellIndexDir, new IndexWriterConfig(null));
writer.close();
}
this.swapSearcher(spellIndexDir);
}
}
public void setComparator(Comparator<SuggestWord> comparator) {
this.comparator = comparator;
}
public Comparator<SuggestWord> getComparator() {
return this.comparator;
}
public void setStringDistance(StringDistance sd) {
this.sd = sd;
}
public StringDistance getStringDistance() {
return this.sd;
}
public void setAccuracy(float acc) {
this.accuracy = acc;
}
public float getAccuracy() {
return this.accuracy;
}
public String[] suggestSimilar(String word, int numSug) th