Leetcode 269. Alien Dictionary(python)

努利！奋斗！

已于 2022-05-14 01:41:48 修改

阅读量2.5k

点赞数

分类专栏： Leetcode 字符串文章标签： leetcode python

于 2020-09-26 07:25:47 首次发布

本文链接：https://blog.csdn.net/qq_37821701/article/details/108807236

版权

Leetcode 字符串专栏收录该内容

37 篇文章

订阅专栏

博客介绍了LeetCode 269题——Alien Dictionary的拓扑排序解法，详细阐述了如何利用拓扑排序解决此问题，包括构建图的过程和注意点。在二刷过程中，作者用C++重新实现，强调了构建图和初始化入度的重要性，并提供了清晰的注释。三刷时，讨论了面试中可能的问题以及优化了查找无效情况的方法，降低了时间复杂度。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

题目

在这里插入图片描述

解法：拓扑排序

这道题的解法leetcode官方写的非常好，也非常有助于理解拓扑排序，建议仔仔细细地看他的官方解析

class Solution:
    def alienOrder(self, words: List[str]) -> str:
        # create adject matrx of the graph
        adj_list = collections.defaultdict(set)
        # create initial indegrees 0 for all distinct words
        indegrees = {}
        for word in words:
            for c in word:
                if c in indegrees:
                    continue
                indegrees[c] = 0
        
        # construct the graph and indegrees
        for first_word,second_word in zip(words,words[1:]):
            for c,d in zip(first_word,second_word):
                if c!=d:
                    # this line is needed, otherwise the indegrees of d will be repeatedly added
                    if d not in adj_list[c]:
                        adj_list[c].add(d)
                        indegrees[d]+=1
                    break
            # this 'else' will still match with the 'if' inside the for loop, it means if after any zip pairs c and d is not equal, codes in 'else' won't be runned. only when all pairs are equal, then codes in 'else' will be runned. In other words, the 'else' match to the final 'if' of the for loop
            else:
                # check if the second word is a prefix of the first word
                if len(second_word)<len(first_word):
                    return ''
        
        # pick all nodes with zero indegree and put it into queue
        q = collections.deque()
        for k,v in indegrees.items():
            if v==0:
                q.append(k)
        
        # pick off zero indegree nodes level by level,and add to the output
        ans = []
        while q:
            c = q.popleft()
            ans.append(c)
            for d in adj_list[c]:
                indegrees[d] -= 1
                if indegrees[d]==0:
                    q.append(d)
        
        # if there are letter that not appear in the output, means there is a cycle in the graph, because on the indegrees of nodes in a cycle will all be non-zero
        if len(ans)<len(indegrees):
            return ''
        
        return "".join(ans)

这边关于第二步的else需要非常仔细的推敲为啥这样可以判断第二个word是不是第一个word的prefix

拓扑排序的时间复杂度：如果DAG图有n个顶点，e条边，在拓扑排序的过程中，搜索入度为零的顶点所需的时间是O(n)。在正常情况下，每个顶点进一次栈，出一次栈，所需时间O(n)。每个顶点入度减1的运算共执行了e次。所以总的时间复杂为O(n+e)。

二刷

二刷的时候用C++写了一遍，思路更清晰，也写了更多注释。总体思路就是利用拓扑排序，关键在于如何构建图，如果图构建好了，那么拓扑序就是我们要的答案。在构建图是，比较相邻两个words，逐个比较两个words相同位置的字符，直到找到第一个字符不相同的位置，这个位置的两个字符我们可以获得有效信息，也就是这两个字符在alien order中的相对顺序。需要注意的是也就是这个位置有用，因为后面的不同位置我们无法获取任何信息。

有一个需要注意的点是，因为这边不是26个字符都会保证出现，所以图和indegrees都只能用hashmap来表示，而不是常见的vector形式。所以要注意需要把出现过的每个字符的入度都初始化为0，不然入度为0的节点不会在indegrees中出现，我们后面进行拓扑排序也就找不到入口

注释写得非常清楚

class Solution {
public:
    string alienOrder(vector<string>& words) {
        // key of the graph is the char appeared in the words, values are the chars comes after the key char in alien order(the relative relationship is derived from the words)
        unordered_map<char,vector<char>> graph;
        unordered_map<char,int> indegrees;
        // we must initialize the indegrees, because indegrees is a hashmap, if we does not initialize the indegrees of the chars to 0, then the chars with no indegree will not appear in the indegrees hashmap after we build the graph and calculate the indegrees 
        for(auto word : words){
            for(auto c : word){
                indegrees[c] = 0;
            }
        }
        
        // build the graph
        for(int i=0;i<words.size()-1;i++){
            // for every sequential pair of words, we can derive the relationship of the chars appeared in the two words
            string w1 = words[i], w2 = words[i+1];
            // first we need to eliminate the invalid case when w2 is a prefix of w1
            if(w1.size() > w2.size() && w1.find(w2) == 0){
                return "";
            }
            // compare the two words char by char, find the first position where the chars at the same position of two words are different, we can have relative order of these two chars. And this is the only information we can get from these two words
            for(int j=0;j<min(w1.size(),w2.size());j++){
                char c1 = w1[j], c2 = w2[j];
                if(c1 != c2){
                    graph[c1].push_back(c2);
                    indegrees[c2] += 1;
                    // we need to break here, because the information after the first two different position cannot tell us anything
                    break;
                }
            }
        }
        
        // then we come to standard topology sort
        queue<char> q;
        // find all the chars with 0 indegrees, these chars can be used as the beginning of the alien order
        for(auto it : indegrees){
            if(it.second == 0){
                q.push(it.first);
            }
        }
        // bfs for topology sort
        string ans = "";
        while(!q.empty()){
            char node = q.front();
            q.pop();
            ans += node;
            for(auto nei : graph[node]){
                indegrees[nei] -= 1;
                if(indegrees[nei] == 0){
                    q.push(nei);
                }
            }
            
        }
        
        // if indegrees of all the chars are 0, means no cycle, otherwise cycle existed
        for(auto it : indegrees){
            if(it.second != 0){
                return "";
            }
        }
        return ans;
    }
};

三刷

题目被锁了，很难受，只能自己开了个ide写，用了些local的test case
这里有些good question to ask during interview：

what if one word is a prefix of the other one?
whill the words always give valid answer?
这次解法主要差别是在如何发现invalid的case，避免了使用string的find方法，应该来说时间复杂度会更低

#include <iostream>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <stack>
#include <queue>

using namespace std;

string alienOrder(vector<string> words){
    unordered_map<char,unordered_set<char>> neighs;
    vector<int> indegrees(26,-1);
    
    for(auto& w : words){
        for(auto& c : w){
            indegrees[c-'a'] = 0;
        }
    }
    for(int i=0;i<words.size();i++){
        for(int j=i+1;j<words.size();j++){
            int pi = 0;
            int pj = 0;
            while(words[i][pi] == words[j][pj] && pi < words[i].size() && pj < words[j].size()){
                pi++;
                pj++;
            }
            if(pi < words[i].size() && pj < words[j].size()){
                if(!neighs[words[i][pi]].count(words[j][pj])) {
                    neighs[words[i][pi]].insert(words[j][pj]);
                    indegrees[words[j][pj]-'a']++;
                }
                if(neighs[words[j][pj]].count(words[i][pi])) return "";
            }
        }
    }
    queue<char> q;
    for(int i=0;i<26;i++){
        // cout << char(i+'a') << " " << indegrees[i] << endl;
        if(indegrees[i] == 0) {
            q.push(char(i+'a'));
            // cout << char(i+'a') << endl;
        }
    }
    string ans = "";
    while(!q.empty()){
        char top = q.front();
        q.pop();
        ans = ans + top;
        for(auto& neigh : neighs[top]){
            indegrees[neigh-'a']--;
            if(indegrees[neigh-'a'] == 0) q.push(neigh);
        }
    }
    return ans;
    
}

int main()
{
    // vector<string> words{"wrt","wrf","er","ett","rftt"};
    // vector<string> words{"z","x","w"};
    // vector<string> words{"zx","zxw","w"};
    vector<string> words{"z","x","z"};
    string res = alienOrder(words);
    cout << res << endl;

    return 0;
}