1.实现strStr()
字符串查找(又称查找子字符串),对于一个给定的 source 字符串和一个 target 字符串,你应该在 source 字符串中找出 target 字符串出现的第一个位置(从0开始)。如果不存在,则返回 -1。例子:
如果 source = "source" 和 target = "target",返回 -1。如果 source = "abcdabcdefg" 和 target = "bcd",返回 1。
1.1 普通实现
int strStr(const char *source, const char *target) {
// write your code here
if (source == NULL || target == NULL) return -1;
int lenS = strlen(source);
int lenT = strlen(target);
for (int i = 0; i < lenS - lenT + 1; ++i) {
int j;
for (j = 0; j < lenT; ++j) {
if (source[i + j] != target[j]) break;
}
if (j == lenT) return i;
}
return -1;
}
1.2 KMP算法
讲解:http://www.ituring.com.cn/article/59881
int strStr(const char *source, const char *target) {
// write your code here
if (source == NULL || target == NULL) return -1;
int lenS = strlen(source);
int lenT = strlen(target);
int* N = new int[lenT];
N[0] = -1;
for (int i = 1; i < lenT; ++i) {
int idx = N[i - 1];
while (idx >= 0 && target[idx + 1] != target[i]) {
idx = N[idx];
}
if (target[idx + 1] == target[i]) {
N[i] = idx + 1;
} else {
N[i] = -1;
}
}
int idxS = 0;
int idxT = 0;
while (idxS < lenS && idxT < lenT) {
if (source[idxS] == target[idxT]) {
++idxS;
++idxT;
} else if (idxT == 0) {
++idxS;
} else {
idxT = N[idxT - 1] + 1;
}
}
delete[] N;
if (idxT == lenT) {
return idxS - idxT;
}
return -1;
}
2. 纠正单词拼写错误
比如输入hello,却错误的输入了hellu,找出出错的字母。
tire树解法:
from string import ascii_lowercase
END = '$'
def make_trie(words):
trie = {}
for word in words:
t = trie
for c in word:
if c not in t:
t[c] = {}
t = t[c]
t[END] = {}
return trie
def check_fuzzy(trie, word, path='', tol=1):
# print(path)
if tol < 0:
return set()
elif word == '':
return {path} if END in trie else set()
else:
ps = set()
for k in trie:
tol1 = tol - 1 if k != word[0] else tol
ps |= check_fuzzy(trie[k], word[1:], path + k, tol1)
# 增加一个字母
for c in ascii_lowercase:
ps |= check_fuzzy(trie[k], c + word[1:], path + k, tol1 - 1)
#减少一个字母
if len(word) > 1:
ps |= check_fuzzy(trie[k], word[2:], path + k, tol - 1)
#交换字母的位置
if len(word) > 2:
ps |= check_fuzzy(trie[k], word[2] + word[1] + word[3:], path + k, tol1 - 1)
return ps
words = ['hello', 'hela', 'dome']
t = make_trie(words)
print(t)
print(check_fuzzy(t, 'hellu', tol=1))
print(check_fuzzy(t, 'healu', tol=1))
print(check_fuzzy(t, 'healu', tol=2))
结果:
{'h': {'e': {'l': {'l': {'o': {'$': {}}}, 'a': {'$': {}}}}}, 'd': {'o': {'m': {'e': {'$': {}}}}}}
{'hello', 'hela'}
set()
{'hello', 'hela'}
{'hello', 'hela'}
set()
{'hello', 'hela'}
3.乱序字符串(Anagram)
给出一个字符串数组S,找到其中所有的乱序字符串(Anagram)。如果一个字符串是乱序字符串,那么他存在一个字母集合相同,但顺序不同的字符串也在S中。
样例:对于字符串数组 ["lint","intl","inlt","code"],返回 ["lint","inlt","intl"]。
注意:所有的字符串都只包含小写字母。
注意:所有的字符串都只包含小写字母。
时间复杂度O(n^2)
public List<String> anagrams(String[] strs) {
List<String> angs = new ArrayList<String>();
boolean[] hasAdd = new boolean[strs.length];
for (int i = 0; i < strs.length; ++i) {
for (int j = i + 1; j < strs.length; ++j) {
if (hasAdd[j]) continue;
if (areAnagrams(strs[i], strs[j])) {
if (!hasAdd[i]) {
angs.add(strs[i]);
hasAdd[i] = true;
}
if (!hasAdd[j]) {
angs.add(strs[j]);
hasAdd[j] = true;
}
}
}
}
return angs;
}
public boolean areAnagrams(String str1, String str2) {
int[] a = new int[30];
for (int i = 0; i < str1.length(); ++i) {
a[str1.charAt(i) - 'a'] += 1;
}
for (int i = 0; i < str2.length(); ++i) {
a[str2.charAt(i) - 'a'] -= 1;
}
for (int i = 0; i < 26; ++i) {
if (a[i] != 0) return false;
}
return true;
}
时间复制度O(n)的hash算法:
public List<String> anagrams(String[] strs) {
List<String> angs = new ArrayList<String>();
Map<Integer, List<String>> map = new HashMap<Integer, List<String>>();
for (String str : strs) {
int[] count = new int[26];
for (int i = 0; i < str.length(); ++i) {
count[str.charAt(i) - 'a']++;
}
int hash = getHash(count);
if (!map.containsKey(hash)) {
map.put(hash, new ArrayList<String>());
}
map.get(hash).add(str);
}
for (List<String> tmp : map.values()) {
if (tmp.size() > 1) {
angs.addAll(tmp);
}
}
return angs;
}
public int getHash(int[] count) {
int hash = 0;
int a = 378551;
int b = 63689;
for (int num : count) {
hash = a * hash + num;
a *= b;
}
return hash;
}