import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.Version;
public class AnalyzerTest {
public static void analysis(Analyzer analyzer, String txt) throws IOException {
System.out.println("analyzer:" + analyzer.getClass());
TokenStream stream = analyzer.tokenStream("content", new StringReader(txt));
stream.reset();
//
while (stream.incrementToken()) {
CharTermAttribute attribute = stream.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = stream.getAttribute(OffsetAttribute.class);
System.out.println("off:" + offsetAttribute.startOffset() + "----" + offsetAttribute.endOffset());
System.out.println("attr:" + attribute.toString());
}
}
public static void main(String[] args) throws IOException {
Analyzer a = new StandardAnalyzer(Version.LUCENE_48);
a = new SimpleAnalyzer(Version.LUCENE_48);
// a = new CJKAnalyzer(Version.LUCENE_48);
//a = new MyStopAnalyzer();
String txt = "this is a txt";
System.out.println("textLength:" + txt.length());
System.out.println("0-4:" + txt.substring(5, 7));
String zhTxt = "这是中文测试,hello 中文 The i am i am";
//analysis(a, txt);
analysis(a, zhTxt);
}
}
lucene解析器分析
最新推荐文章于 2021-02-24 05:02:11 发布