topk问题java实现

topk问题是一个经典的问题,下面分别采用最小堆和guava来解决这个问题:

采用最小堆:

import com.google.common.base.Splitter;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.primitives.Ints;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;

/**
 * Created by fhqplzj on 17-2-16 at 下午9:57.
 */
public class My4 {
    public static void main(String[] args) throws IOException {
        String path = "/home/fhqplzj/IdeaProjects/DocumentClustering/src/resources/data/ap";
        String s = FileUtils.readFileToString(new File(path), Charset.defaultCharset());
        Iterable<String> words = Splitter.onPattern("\\W+").omitEmptyStrings().split(s);
        HashMultiset<String> hashMultiset = HashMultiset.create(words);
        Set<Multiset.Entry<String>> entries = hashMultiset.entrySet();
        int k = 10;
        Comparator<Multiset.Entry<String>> comparator = (o1, o2) -> Ints.compare(o1.getCount(), o2.getCount());
        PriorityQueue<Multiset.Entry<String>> priorityQueue = new PriorityQueue<>(comparator);
        for (Multiset.Entry<String> entry : entries) {
            if (priorityQueue.size() < k) {
                priorityQueue.offer(entry);
            } else if (priorityQueue.size() == k && comparator.compare(priorityQueue.peek(), entry) < 0) {
                priorityQueue.poll();
                priorityQueue.offer(entry);
            }
        }
        ArrayDeque<Multiset.Entry<String>> arrayDeque = new ArrayDeque<>();
        while (!priorityQueue.isEmpty()) {
            arrayDeque.push(priorityQueue.poll());
        }
        arrayDeque.forEach(System.out::println);
    }
}
采用guava的ordering类:

import com.google.common.base.Function;
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
import org.apache.commons.io.FileUtils;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Set;

/**
 * Created by fhqplzj on 17-2-19 at 下午4:02.
 */
public class My2 {
    public static void main(String[] args) throws IOException {
        String path = "/home/fhqplzj/IdeaProjects/DocumentClustering/src/resources/data/ap";
        String s = FileUtils.readFileToString(new File(path), Charset.defaultCharset());
        Iterable<String> words = Splitter.onPattern("\\W+").omitEmptyStrings().split(s);
        HashMultiset<String> counter = HashMultiset.create(words);
        Set<Multiset.Entry<String>> entries = counter.entrySet();
        Ordering<Multiset.Entry<String>> ordering = Ordering.natural().onResultOf(new Function<Multiset.Entry<String>, Integer>() {
            @Nullable
            @Override
            public Integer apply(@Nullable Multiset.Entry<String> input) {
                assert input != null;
                return input.getCount();
            }
        });
        List<Multiset.Entry<String>> result = ordering.greatestOf(entries, 10);
        result.forEach(System.out::println);
    }
}
切忌重复造轮子,以后用guava来解决这一问题

展开阅读全文

没有更多推荐了,返回首页