Use the shortest unique prefix to represent each word in the array. For example:
input: ["zebra", "dog", "duck", "dove"]
输出:
input: ["zebra", "dog", "duck"]
output: {zebra: z, dog: do, duck: du}input: ["zebra", "dog", "duck", "dove"]
output: {zebra:z, dog: dog, duck: du, dove: dov}
首先想到的是排序,然后计算相邻字符串的最短前缀,这个时间复杂度比较高,应该是O(nLlog(n)),其中L是字符串的长度。然后想到Trie树(字典树)可以完美的解决这类问题,以空间换时间,时间复杂度可以达到O(nL)。
上代码:
// prefix.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <string>
#include <vector>
#include <algorithm>
#include <iostream>
using namespace std;
void shortestPrefix1(char** words, int size) {
vector<string> vWords;
for (int i = 0; i < size; ++i) {
vWords.push_back(string(words[i]));
}
sort(vWords.begin(), vWords.end());
int prevDiff = -1;
for (int i = 0; i < size - 1; ++i) {
int minLength = min(vWords[i].size(), vWords[i+1].size());
int j = 0;
for (; j < minLength; ++j) {
if (vWords[i][j] != vWords[i+1][j]) {
break;
}
}
if (j == minLength) {
cout << vWords[i] << ": " << vWords[i] << endl;
}
else {
int minPrefix = max(prevDiff, j);
cout << vWords[i] << ": " << vWords[i].substr(0, minPrefix + 1) << endl;
}
prevDiff = j;
}
cout << vWords[size-1] << ": " << vWords[size-1].substr(0, prevDiff + 1) << endl;
}
struct TrieNode {
int count;
TrieNode* children[26];
TrieNode() {
count = 0;
memset(&children[0], 0, sizeof(TrieNode*) * 26);
}
};
class TrieTree {
public:
TrieTree() {
root = NULL;
}
int getTotalCount() {
return root == NULL ? 0: root->count;
}
void add(string str) {
if (root == NULL) {
root = new TrieNode();
}
TrieNode* branch = root;
branch->count++;
for (int i = 0; i < str.size(); ++i) {
int index = str[i] - 'a';
if (branch->children[index] == NULL) {
branch->children[index] = new TrieNode();
}
branch = branch->children[index];
branch->count++;
}
}
string shortestPrefix(string str) {
TrieNode* branch = root;
int i = 0;
for (; i < str.size(); ++i) {
branch = branch->children[str[i]-'a'];
if (branch->count == 1) {
break;
}
}
if (i == str.size()) {
return str;
}
else {
return str.substr(0, i+1);
}
}
private:
TrieNode* root;
};
int _tmain(int argc, _TCHAR* argv[])
{
char *words[] = {"zebra", "dog", "duck", "dove", "frog", "franch", "fly", "zoo", "great"};
cout << "###Shortest prefix by sorting###" << endl;
shortestPrefix1(words, sizeof(words)/sizeof(char*));
TrieTree trie;
for (int i = 0; i < sizeof(words)/sizeof(char*); ++i) {
trie.add(words[i]);
}
cout << "\n###Shortest prefix by Trie tree###" << endl;
for (int i = 0; i < sizeof(words)/sizeof(char*); ++i) {
cout << words[i] << ": " << trie.shortestPrefix(words[i]) << endl;
}
cout << "Total number of strings = " << trie.getTotalCount() << endl;
return 0;
}
输出:
相比之下,Trie树的方案简单,不容易出错;排序的方法要注意之前之后的字符串都需要考虑进来。
面试的时候呢,Trie树可以几分钟写出来。Suffix树就不行了,虽然效率更好,构造太复杂了,写不出来啊。。。