//统计出现的所有terms
private static void countTerms() throws IOException {
Directory directory = FSDirectory.open(Paths.get("index"));
IndexReader ir=DirectoryReader.open(directory);
//获取某个域分词后的terms
//此处8.x版本有变化,使用MultiTerms来获取Terms
Terms terms = MultiTerms.getTerms(ir, "content");
TermsEnum iterator = terms.iterator();
BytesRef bRef = null;
int rank=1;
while ((bRef=iterator.next())!=null) {
String oneTerm=new String(bRef.bytes, bRef.offset, bRef.length, Charset.forName("utf-8"));
//docFreq:当前这个term在几个document里面出现了
System.out.println("第"+rank+"个term是:"+oneTerm+",共在:"+iterator.docFreq()
+"个文档出现,该term在所有文档中共出现:"
+ir.totalTermFreq(new Term("content", bRef))+"次");
rank++;
}
ir.close();
directory.close();
}
Luence8.x (二)统计trems分别出现的次数
最新推荐文章于 2024-10-06 16:55:50 发布