import com.google.common.collect.Lists;
import com.google.common.io.CharSource;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import com.google.common.util.concurrent.AtomicLongMap;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingQueue;
import static com.google.common.base.Charsets.UTF_8;
public class TopKWord {
public static void main(String[] args) throws IOException {
File file = new File("D:\\CodeBetter\\src\\main\\resources\\io\\sourceA");
List<String> wordList = getWordListBySplitPaper(file);
Map<String, Long> wordAndCountMap = getWordAndCountMap(wordList);
int k = 10;
Queue<String> wordQ = topKWord(k, wordAndCountMap);
System.out.println(wordQ);
}
private static List<String> getWordListBySplitPaper(File file) throws IOException {
if (file == null) {
return Lists.newArrayList();
}
CharSource charSource = Files.asCharSource(file, UTF_8);
LineProcessor<List<String>> listLineProcessor = new LineProcessor<List<String>>() {
List<String> result = Lists.newArrayList();
@Override
public boolean processLine(String words) throws IOException {
if (StringUtils.isBlank(words)) {
return true;
}
String[] wordList = words.split(" ");
for (String word : wordList) {
String wordLastChar = word.substring(word.length() - 1);
if (!StringUtils.isAlphanumeric(wordLastChar)) {
result.add(word.substring(0, word.length() - 1));
} else {
result.add(word);
}
}
return true;
}
@Override
public List<String> getResult() {
return result;
}
};
return charSource.readLines(listLineProcessor);
}
private static Map<String, Long> getWordAndCountMap(List<String> wordList) {
AtomicLongMap<String> map = AtomicLongMap.create();
wordList.forEach(map::incrementAndGet);
return map.asMap();
}
private static Queue<String> topKWord(int k, Map<String, Long> wordAndCountMap) {
if (k <= 0 || MapUtils.isEmpty(wordAndCountMap)) {
return new LinkedBlockingQueue<>();
}
Queue<String> wordQ = new LinkedBlockingQueue<>();
Queue<Long> countQ = new PriorityQueue<>(k, Comparator.comparingLong(i -> i));
wordAndCountMap.forEach((word, count) -> {
if (wordQ.size() < k) {
wordQ.add(word);
countQ.add(count);
} else {
Long peek = countQ.peek();
if (peek != null && peek < count) {
countQ.poll();
countQ.add(count);
wordQ.poll();
wordQ.add(word);
}
}
});
return wordQ;
}
}