Analyzer analyzer = new WhitespaceAnalyzer();
try {
// 第一个参数只是标识性没有实际作用
TokenStream stream = analyzer.tokenStream("", new StringReader("我爱 北京 天安门"));
// 获取词与词之间的位置增量
PositionIncrementAttribute postiona = stream.addAttribute(PositionIncrementAttribute.class);
// 获取各个单词之间的偏移量
OffsetAttribute offseta = stream.addAttribute(OffsetAttribute.class);
// 获取每个单词信息
CharTermAttribute chara = stream.addAttribute(CharTermAttribute.class);
// 获取当前分词的类型
TypeAttribute typea = stream.addAttribute(TypeAttribute.class);
stream.reset();
while (stream.incrementToken()) {
System.out.print("位置增量" + postiona.getPositionIncrement() + ":\t");
System.out.println(
chara + "\t[" + offseta.startOffset() + " - " + offseta.endOffset() + "]\t<" + typea + ">");
}
System.out.println();
} catch (Exception e) {
e.printStackTrace();
}