lucene词频统计

在这里插入图片描述

package test;

import lucene.MYIKAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;

public class CipinTest {

    public static void main(String[] args) throws IOException, ParseException {
        // 搜索多个字段使用数组
        // String[] fields= {"title","content"};
//        getConfUtil getcon = new getConfUtil();
//        ArrayList<contentEntity> contentList = new ArrayList<contentEntity>();
        Path indexPath = Paths.get("C:\\Users\\85335\\Desktop\\luceneTest\\indexDir");

        Directory dir = FSDirectory.open(indexPath);
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        Document doc = searcher.doc(3);
        String content = doc.get("content");
        MYIKAnalyzer analyzer = new MYIKAnalyzer(true);
        TokenStream tokenStream = analyzer.tokenStream("", new StringReader(content));
        tokenStream.reset();
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        HashMap<String, Integer> map = new HashMap<>();
        while (tokenStream.incrementToken()) {
            String singleWord = charTermAttribute.subSequence(0, charTermAttribute.length()).toString();
            map.put(singleWord, map.get(singleWord) == null ? 1 : map.get(singleWord) + 1);
            charTermAttribute.setEmpty();
        }

        ArrayList<Map.Entry<String, Integer>> sortMap = new ArrayList<>(map.entrySet());
        Collections.sort(sortMap, (o1, o2) -> o2.getValue().compareTo(o1.getValue()));

        sortMap.forEach(item -> {
            System.out.println(item.getKey()+": " + item.getValue());
        });

    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值