lucene词频统计

冰块的旅行

于 2020-11-03 16:27:40 发布

阅读量437

点赞数

分类专栏： lucene 文章标签： lucene doc

本文链接：https://blog.csdn.net/qq_38967150/article/details/109472274

版权

lucene 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

在这里插入图片描述

package test;

import lucene.MYIKAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;

public class CipinTest {

    public static void main(String[] args) throws IOException, ParseException {
        // 搜索多个字段使用数组
        // String[] fields= {"title","content"};
//        getConfUtil getcon = new getConfUtil();
//        ArrayList<contentEntity> contentList = new ArrayList<contentEntity>();
        Path indexPath = Paths.get("C:\\Users\\85335\\Desktop\\luceneTest\\indexDir");

        Directory dir = FSDirectory.open(indexPath);
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        Document doc = searcher.doc(3);
        String content = doc.get("content");
        MYIKAnalyzer analyzer = new MYIKAnalyzer(true);
        TokenStream tokenStream = analyzer.tokenStream("", new StringReader(content));
        tokenStream.reset();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        HashMap<String, Integer> map = new HashMap<>();
        while (tokenStream.incrementToken()) {
            String singleWord = charTermAttribute.subSequence(0, charTermAttribute.length()).toString();
            map.put(singleWord, map.get(singleWord) == null ? 1 : map.get(singleWord) + 1);
            charTermAttribute.setEmpty();
        }

        ArrayList<Map.Entry<String, Integer>> sortMap = new ArrayList<>(map.entrySet());
        Collections.sort(sortMap, (o1, o2) -> o2.getValue().compareTo(o1.getValue()));

        sortMap.forEach(item -> {
            System.out.println(item.getKey()+": " + item.getValue());
        });

    }
}