692. Top K Frequent Words（python+cpp）（字典树统计）_692. top k frequent words python-CSDN博客

本文链接：https://blog.csdn.net/qq_21275321/article/details/84346746

题目：

Given a non-empty list of words, return the k most frequent elements.
Your answer should be sorted by frequency from highest to lowest. If two words have the same frequency, then the word with the lower alphabetical order comes first.
Example 1:
Input: ["i", "love", "leetcode", "i", "love", "coding"], k = 2 
Output: ["i", "love"]
 Explanation: "i" and "love" are the two most frequent words.Note that "i" comes
  before "love" due to a lower alphabetical order. 
Example 2:
Input: ["the", "day", "is", "sunny", "the", "the", "the", "sunny", "is", "is"], k = 4 
Output: ["the", "is", "sunny", "day"] 
Explanation: "the", "is", "sunny" and "day" are the four most frequent words, with 
the number of occurrence being 4, 3, 2 and 1 respectively. 
Note:
You may assume k is always valid, 1 ≤ k ≤ number of unique elements.
Input words contain only lowercase letters.
Follow up:
Try to solve it in O(n log k) time and O(n) extra space.

解释：
直接用Counter()后排序就能做，这里主要探讨用Trie做词频统计的方法。
统计+排序的解法：
python代码：

from collections import Counter
class Solution(object):
    def topKFrequent(self, words, k):
        """
        :type words: List[str]
        :type k: int
        :rtype: List[str]
        """
        c=Counter(words)
        keys=c.keys();
        keys.sort(key = lambda w: (-c[w], w))
        return keys[:k]

c++代码：

#include<map>
using namespace std;
class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        map<string ,int>_count;
        for (auto word:words)
            _count[word]++;
        vector<pair<string ,int>> list_count(_count.begin(),_count.end());
        sort(list_count.begin(),list_count.end(),cmp);
        vector<string> result;
        for(int i=0;i<k;i++)
            result.push_back(list_count[i].first);
        return result;
    }
    static bool cmp(const pair<string,int>& item1,const pair<string,int>& item2)
    {
        if (item1.second!=item2.second)
            return item1.second>item2.second;
        else
            return item1.first<item2.first;
    }
};

字典树解法（字典树统计词频），字典树统计词频还是需要用字典存起来之后排序的，但是用字典树有利于压缩存储空间，达到 follow up 所要求的时间复杂度和空间复杂度：
python代码（比用Counter()慢）：

#用字典树还是需要排序??用字典树统计词频有利于压缩空间
#不知道Counter()内部是咋实现的，没准就是字典树呢
from collections import defaultdict
class TrieNode(object):
    def __init__(self):
        self.children=defaultdict(TrieNode)
        self.is_word=False
        self.freq=0
class Solution(object):
    def topKFrequent(self, words, k):
        """
        :type words: List[str]
        :type k: int
        :rtype: List[str]
        """
        def insert(root,word):
            current=root
            for letter in word:
                current=current.children[letter]
            #在最后一个字母的位置，is_word变为True
            current.is_word=True
            current.freq+=1
            return current.freq
        root = TrieNode()
        _dict={}
        for word in words:
            freq=insert(root,word)
            _dict[word]=freq
        keys=_dict.keys();
        keys.sort(key = lambda w: (-_dict[w], w))
        return keys[:k]

c++代码：

#include<map>
using namespace std;
struct TrieNode {
    bool isWord;
    int freq;
    char c;
    map<char,TrieNode*> children;
    TrieNode(char x) : c(x),isWord(false),freq(0){}
    TrieNode() : isWord(false),freq(0){}
};
class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        TrieNode* root= new TrieNode();
        map<string ,int>_count;
        for (auto word:words)
        {
            int freq=insert(root,word);
            _count[word]=freq;
        }
        vector<pair<string ,int>> list_count(_count.begin(),_count.end());
        sort(list_count.begin(),list_count.end(),cmp);
        vector<string> result;
        for(int i=0;i<k;i++)
            result.push_back(list_count[i].first);
        return result;      
        
    }
    int insert(TrieNode* root ,string word) {
        TrieNode* current=root;
        for(auto letter:word)
        {
            if (!current->children.count(letter))
                current->children[letter]=new TrieNode(letter);
            current=current->children[letter];
        }
        current->isWord=true;
        current->freq++;
        return current->freq;
    }
    static bool cmp(const pair<string,int>& item1,const pair<string,int>& item2)
    {
        if (item1.second!=item2.second)
            return item1.second>item2.second;
        else
            return item1.first<item2.first;
    }
};