package test;
import lucene.MYIKAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
public class CipinTest {
public static void main(String[] args) throws IOException, ParseException {
Path indexPath = Paths.get("C:\\Users\\85335\\Desktop\\luceneTest\\indexDir");
Directory dir = FSDirectory.open(indexPath);
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
Document doc = searcher.doc(3);
String content = doc.get("content");
MYIKAnalyzer analyzer = new MYIKAnalyzer(true);
TokenStream tokenStream = analyzer.tokenStream("", new StringReader(content));
tokenStream.reset();
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
HashMap<String, Integer> map = new HashMap<>();
while (tokenStream.incrementToken()) {
String singleWord = charTermAttribute.subSequence(0, charTermAttribute.length()).toString();
map.put(singleWord, map.get(singleWord) == null ? 1 : map.get(singleWord) + 1);
charTermAttribute.setEmpty();
}
ArrayList<Map.Entry<String, Integer>> sortMap = new ArrayList<>(map.entrySet());
Collections.sort(sortMap, (o1, o2) -> o2.getValue().compareTo(o1.getValue()));
sortMap.forEach(item -> {
System.out.println(item.getKey()+": " + item.getValue());
});
}
}