package com.yuan;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.FilesystemResourceLoader;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* @author hankcs
*/
public class TestSynonyms
{
private static void displayTokens(TokenStream ts) throws IOException
{
CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
ts.reset();
while (ts.incrementToken())
{
String token = termAttr.toString();
System.out.print(offsetAttribute.startOffset() + "-" + offsetAttribute.endOffset() + "[" + token + "] ");
}
System.out.println();
ts.end();
ts.close();
}
public static void main(String[] args) throws Exception
{
String testInput = "其实 i似 好人";
Version ver = Version.LUCENE_47;
Map<String, String> filterArgs = new HashMap<String, String>();
filterArgs.put("luceneMatchVersion", ver.toString());
filterArgs.put("synonyms", "c:/同义词/synonymword.dic");
//filterArgs.put("expand", "true");
SynonymFilterFactory factory = new SynonymFilterFactory(filterArgs);
factory.inform(new FilesystemResourceLoader());
IKAnalyzer ikAnalyzer = new IKAnalyzer();
TokenStream ts = factory.create(ikAnalyzer.tokenStream("someField", testInput));
displayTokens(ts);
}
}
由于在同义词库的编码与读取文件所设的编码不一致,导致开始时中文同义词不好使。
Lucene同义词(一)
最新推荐文章于 2021-03-20 00:18:32 发布