FSDirectory directory = FSDirectory.open(new File("xxx"));
IndexReader indexReader = DirectoryReader.open(directory);
String[] fields = {"xxx"};
for (int i = 0; i < fields.length; i++) {
Terms ter = MultiFields.getTerms(indexReader, fields[i]);
TermsEnum iterator = ter.iterator(TermsEnum.EMPTY);
BytesRef bytesRef;
while (ObjectUtil.isNotEmpty(bytesRef = iterator.next())) {
String oneTermStr = new String(bytesRef.bytes, bytesRef.offset,
bytesRef.length, Charset.forName("utf-8"));
// System.out.println("term值:"+oneTermStr+
// // document frequency:df,值越大,分值越小
// ",在"+iterator.docFreq()+"个文档中出现,"+
// // term frequency:tf,值越大,分值越大
// ",在所有文档的name域中共出现"+indexReader.totalTermFreq(new Term("goodName",bytesRef))+"次"
// );
Map map = new HashMap();
map.put("name", TermsValueEnum.convert(fields[i]));
map.put("freq", iterator.docFreq());
map.put("text", oneTermStr);
map.put("totalTermFreq", indexReader.totalTermFreq(new Term(fields[i], bytesRef)));
}
}
Lucene获取分词的内容
最新推荐文章于 2024-07-21 19:02:10 发布