1.显示分词词汇
public static void displayToken(String str, Analyzer analyzer){ try { /** * TokenStream tokenStream(String fieldName, Reader reader): * 获取TokenStream(分词流) * 参数一:域名(这里没有什么实际意义) * 参数二:输入流 */ TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str)); //创建一个属性(里面有分词后的词汇),这个属性添加到TokenStream中,随TokenStream移动而增加 CharTermAttribute cta = tokenStream.addAttribute(CharTermAttribute.class); //遍历TokenStream while (tokenStream.incrementToken()){ System.out.print("["+cta+"]"); } System.out.println(""); System.out.println("--------------------------------"); } catch (IOException e) { e.printStackTrace(); } }
2.显示分词所有信息
public static void displayAllTokenInfo(String str, Analyzer analyzer){ try { TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str)); //位置增量的属性,存储语汇单元之间的距离 PositionIncrementAttribute pia = tokenStream.addAttribute(PositionIncrementAttribute.class); //每个语汇单元的位置偏移量 OffsetAttribute oa = tokenStream.addAttribute(OffsetAttribute.class); //使用的分词器的类型信息 TypeAttribute ta = tokenStream.addAttribute(TypeAttribute.class); //存储每一个语汇单元的信息(分词单元信息) CharTermAttribute ca = tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()){ System.out.println("位置增量:"+pia+" 词汇的偏移量:"+oa.startOffset()+"--"+oa.endOffset()+" 类型:"+ta+" 分词:"+ca); } } catch (IOException e) { e.printStackTrace(); } }