692. Top K Frequent Words(python+cpp)(字典树统计)

题目:

Given a non-empty list of words, return the k most frequent elements.
Your answer should be sorted by frequency from highest to lowest. If two words have the same frequency, then the word with the lower alphabetical order comes first.
Example 1:

Input: ["i", "love", "leetcode", "i", "love", "coding"], k = 2 
Output: ["i", "love"]
 Explanation: "i" and "love" are the two most frequent words.Note that "i" comes
  before "love" due to a lower alphabetical order. 

Example 2:

Input: ["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], k = 4 
Output: ["the", "is", "sunny", "day"] 
Explanation: "the", "is", "sunny" and "day" are the four most frequent words, with 
the number of occurrence being 4, 3, 2 and 1 respectively. 

Note:
You may assume k is always valid, 1 ≤ k ≤ number of unique elements.
Input words contain only lowercase letters.
Follow up:
Try to solve it in O(n log k) time and O(n) extra space.

解释:
直接用Counter()后排序就能做,这里主要探讨用Trie做词频统计的方法。
统计+排序的解法:
python代码:

from collections import Counter
class Solution(object):
    def topKFrequent(self, words, k):
        """
        :type words: List[str]
        :type k: int
        :rtype: List[str]
        """
        c=Counter(words)
        keys=c.keys();
        keys.sort(key = lambda w: (-c[w], w))
        return keys[:k] 

c++代码:

#include<map>
using namespace std;
class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        map<string ,int>_count;
        for (auto word:words)
            _count[word]++;
        vector<pair<string ,int>> list_count(_count.begin(),_count.end());
        sort(list_count.begin(),list_count.end(),cmp);
        vector<string> result;
        for(int i=0;i<k;i++)
            result.push_back(list_count[i].first);
        return result;
    }
    static bool cmp(const pair<string,int>& item1,const pair<string,int>& item2)
    {
        if (item1.second!=item2.second)
            return item1.second>item2.second;
        else
            return item1.first<item2.first;
    }
};

字典树解法(字典树统计词频),字典树统计词频还是需要用字典存起来之后排序的,但是用字典树有利于压缩存储空间,达到 follow up 所要求的时间复杂度和空间复杂度:
python代码(比用Counter()慢):

#用字典树还是需要排序??用字典树统计词频有利于压缩空间
#不知道Counter()内部是咋实现的,没准就是字典树呢
from collections import defaultdict
class TrieNode(object):
    def __init__(self):
        self.children=defaultdict(TrieNode)
        self.is_word=False
        self.freq=0
class Solution(object):
    def topKFrequent(self, words, k):
        """
        :type words: List[str]
        :type k: int
        :rtype: List[str]
        """
        def insert(root,word):
            current=root
            for letter in word:
                current=current.children[letter]
            #在最后一个字母的位置,is_word变为True
            current.is_word=True
            current.freq+=1
            return current.freq
        root = TrieNode()
        _dict={}
        for word in words:
            freq=insert(root,word)
            _dict[word]=freq
        keys=_dict.keys();
        keys.sort(key = lambda w: (-_dict[w], w))
        return keys[:k] 

c++代码:

#include<map>
using namespace std;
struct TrieNode {
    bool isWord;
    int freq;
    char c;
    map<char,TrieNode*> children;
    TrieNode(char x) : c(x),isWord(false),freq(0){}
    TrieNode() : isWord(false),freq(0){}
};
class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        TrieNode* root= new TrieNode();
        map<string ,int>_count;
        for (auto word:words)
        {
            int freq=insert(root,word);
            _count[word]=freq;
        }
        vector<pair<string ,int>> list_count(_count.begin(),_count.end());
        sort(list_count.begin(),list_count.end(),cmp);
        vector<string> result;
        for(int i=0;i<k;i++)
            result.push_back(list_count[i].first);
        return result;      
        
    }
    int insert(TrieNode* root ,string word) {
        TrieNode* current=root;
        for(auto letter:word)
        {
            if (!current->children.count(letter))
                current->children[letter]=new TrieNode(letter);
            current=current->children[letter];
        }
        current->isWord=true;
        current->freq++;
        return current->freq;
    }
    static bool cmp(const pair<string,int>& item1,const pair<string,int>& item2)
    {
        if (item1.second!=item2.second)
            return item1.second>item2.second;
        else
            return item1.first<item2.first;
    }
};

总结:
虽然字典树的解法速度慢,但是学会了用字典树统计词频,况且节省了存储空间。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值