topk问题是一个经典的问题,下面分别采用最小堆和guava来解决这个问题:
采用最小堆:
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.primitives.Ints;
import org.apache.commons.io.FileUtils;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
/**
* Created by fhqplzj on 17-2-16 at 下午9:57.
*/
public class My4 {
public static void main(String[] args) throws IOException {
String path = "/home/fhqplzj/IdeaProjects/DocumentClustering/src/resources/data/ap";
String s = FileUtils.readFileToString(new File(path), Charset.defaultCharset());
Iterable words = Splitter.onPattern("\\W+").omitEmptyStrings().split(s);
HashMultiset hashMultiset = HashMultiset.create(words);
Set> entries = hashMultiset.entrySet();
int k = 10;
Comparator> comparator = (o1, o2) -> Ints.compare(o1.getCount(), o2.getCount());
PriorityQueue> priorityQueue = new PriorityQueue<>(comparator);
for (Multiset.Entry entry : entries) {
if (priorityQueue.size() < k) {
priorityQueue.offer(entry);
} else if (priorityQueue.size() == k && comparator.compare(priorityQueue.peek(), entry) < 0) {
priorityQueue.poll();
priorityQueue.offer(entry);
}
}
ArrayDeque> arrayDeque = new ArrayDeque<>();
while (!priorityQueue.isEmpty()) {
arrayDeque.push(priorityQueue.poll());
}
arrayDeque.forEach(System.out::println);
}
}采用guava的ordering类:
import com.google.common.base.Function;
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
import org.apache.commons.io.FileUtils;
import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Set;
/**
* Created by fhqplzj on 17-2-19 at 下午4:02.
*/
public class My2 {
public static void main(String[] args) throws IOException {
String path = "/home/fhqplzj/IdeaProjects/DocumentClustering/src/resources/data/ap";
String s = FileUtils.readFileToString(new File(path), Charset.defaultCharset());
Iterable words = Splitter.onPattern("\\W+").omitEmptyStrings().split(s);
HashMultiset counter = HashMultiset.create(words);
Set> entries = counter.entrySet();
Ordering> ordering = Ordering.natural().onResultOf(new Function, Integer>() {
@Nullable
@Override
public Integer apply(@Nullable Multiset.Entry input) {
assert input != null;
return input.getCount();
}
});
List> result = ordering.greatestOf(entries, 10);
result.forEach(System.out::println);
}
}切忌重复造轮子,以后用guava来解决这一问题