如果lucene在查询的时候要显示同音词的话,那么在创建索引的时候就要加入相关的算法,肯定会损耗性能,得不偿失。
/**
* 2013.06.06
* @author 赵洪志
* 同音词测试,只是随便看看
*/
package com.zhao.lucene.analysis.codec;
import java.io.IOException;
import org.apache.commons.codec.language.Metaphone;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
public class MetaphoneReplacementFilter extends TokenFilter {
public static final String METAPHONE = "metaphone";
private Metaphone metaphoner = new Metaphone();
private TermAttribute termAttr;
private TypeAttribute typeAttr;
public MetaphoneReplacementFilter(TokenStream input) {
super(input);
termAttr = addAttribute(TermAttribute.class);
typeAttr = addAttribute(TypeAttribute.class);
}
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) // #A
return false; // #A
String encoded;
encoded = metaphoner.encode(termAttr.term()); // #B
termAttr.setTermBuffer(encoded); // #C
typeAttr.setType(METAPHONE); // #D
return true;
}
}
/**
* 2013.06.06
* @author 赵洪志
* 同音词测试,只是随便看看
*/
package com.zhao.lucene.analysis.codec;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.TokenStream;
public class MetaphoneReplacementAnalyzer extends Analyzer {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
// TODO 自动生成的方法存根
return new MetaphoneReplacementFilter(new LetterTokenizer(reader));
}
}
/**
* 2013.06.06
* @author 赵洪志
* 同音词测试,只是随便看看
*/
package com.zhao.lucene.analysis.codec;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import junit.framework.TestCase;
public class MetaphoneAnalyzerTest extends TestCase {
public void testKoolKat() throws Exception {
RAMDirectory directory = new RAMDirectory();
Analyzer analyzer = new MetaphoneReplacementAnalyzer();
IndexWriter writer = new IndexWriter(directory, analyzer, true,
IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("contents", // #A
"我爱中华人民共和国", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
IndexSearcher searcher = new IndexSearcher(directory);
Query query = new QueryParser(Version.LUCENE_30, // #B
"contents", analyzer) // #B
.parse("爱人"); // #B
TopDocs hits = searcher.search(query, 1);
if (hits.totalHits > 0) {
int docID = hits.scoreDocs[0].doc;
doc = searcher.doc(docID);
assertEquals("我爱中华人民共和国", doc.get("contents")); // #D
System.out.println(doc.get("contents"));
}
searcher.close();
}
/*
* #A Index document #B Parse query text #C Verify match #D Retrieve
* original value
*/
public static void main(String[] args) throws IOException {
MetaphoneReplacementAnalyzer analyzer = new MetaphoneReplacementAnalyzer();
TokenStream stream = analyzer.tokenStream("content", new StringReader(
"The quick brown fox jumped over the lazy dog"));
TermAttribute attribute = stream.addAttribute(TermAttribute.class);
while (stream.incrementToken()) {
System.out.print(attribute.term() + "===");
}
System.out.println();
// System.out.println("");
// AnalyzerUtils.displayTokens(analyzer,
// "Tha quik brown phox jumpd ovvar tha lazi dag");
}
}