字符串匹配-KMP,BM,Robin Karp-等

11 篇文章 0 订阅

32.1介绍了brute force的匹配

32.2.Robin Karp

d，p的选择也是重要的。d相当于一个基数，算法导论介绍时候设d为10方便读者理解。算法导论这书果然还是比网上资料靠谱很多啊。

32.3 String matching with finite automata

32.4 The Knuth-Morris-Pratt algorithm

//代码4-1
//修正后的求next数组各值的函数代码
void get_nextval(char const* ptrn, int plen, int* nextval)
{
int i = 0;
nextval[i] = -1;
int j = -1;
while( i < plen-1 )
{
if( j == -1 || ptrn[i] == ptrn[j] )   //循环的if部分
{
++i;
++j;
//修正的地方就发生下面这4行
if( ptrn[i] != ptrn[j] ) //++i，++j之后，再次判断ptrn[i]与ptrn[j]的关系
nextval[i] = j;      //之前的错误解法就在于整个判断只有这一句。
else
nextval[i] = nextval[j];
}
else                                 //循环的else部分
j = nextval[j];
}
}

This is identical to the problem to find the longest acylic path in a directed graph. If there is a cycle, return
false.
Firstly, build the graph. Then search the graph for the longest path.
#define MAX_NUM 201
int inDegree[MAX_NUM];
int longestConcat(char ** strs, int m, int n) {
int graph[MAX_NUM][MAX_NUM];
int prefixHash[MAX_NUM];
int suffixHash[MAX_NUM];
int i,j;
for (i=0; i<n; i++) {
calcHash(strs[i], prefixHash[i], suffixHash[i]);
graph[i][0] = 0;
}
memset(inDegree, 0, sizeof(int)*n);
for (i=0; i<n; i++) {
for (j=0; j<n; j++) {
if (suffixHash[i]==prefixHash[j] && strncmp(strs[i]+1, strs[j], m) == 0) {
if (i==j) return 0; // there is a self loop, return false.
graph[i][0] ++;
graph[i][graph[i*n]] = j;
inDegree[j] ++;
}
}
}
return longestPath(graph, n);
}
/**
* 1. do topological sort, record index[i] in topological order.
* 2. for all 0-in-degree vertexes, set all path length to -1, do relaxation in topological order to find single
source shortest path.
*/
int visit[MAX_NUM];
int parent[MAX_NUM];
// -1 path weight, so 0 is enough.
#define MAX_PATH 0
int d[MAX_NUM];
int longestPath(int graph[], int n) {
memset(visit, 0, n*sizeof(int));
if (topSort(graph) == 0) return -1; //topological sort failed, there is cycle.
int min = 0;
for (int i=0; i<n; i++) {
if (inDegree[i] != 0) continue;
memset(parent, -1, n*sizeof(int));
memset(d, MAX_PATH, n*sizeof(int));
d[i] = 0;
for (int j=0; j<n; j++) {
for (int k=1; k<=graph[top[j]][0]; k++) {
if (d[top[j]] - 1 < d[graph[top[j]][k]]) { // relax with path weight -1
d[graph[top[j]][k]] = d[top[j]] - 1;
parent[graph[top[j]][k]] = top[j];
if (d[graph[top[j]][k]] < min) min = d[graph[top[j]][k]];
}
}
}
}
return -min;
}
int top[MAX_NUM];
int finished[MAX_NUM];
int cnt = 0;
int topSort(int graph[]){
memset(visit, 0, n*sizeof(int));
memset(finished, 0, n*sizeof(int));
for (int i=0; i<n; i++) {
if (topdfs(graph, i) == 0) return 0;
}
return 1;
}
int topdfs(int graph[], int s) {
if (visited[s] != 0) return 1;
for (int i=1; i<=graph[s][0]; i++) {
if (visited[graph[s][i]]!=0 && finished[graph[s][i]]==0) {
return 0; //gray node, a back edge;
}
if (visited[graph[s][i]] == 0) {
visited[graph[s][i]] = 1;
dfs(graph, graph[s][i]);
}
}
finished[s] = 1;
top[cnt++] = s;
return 1;
}Time complexity analysis:Hash calculation: O(nm)Graph construction: O(n*n)Toplogical sort: as dfs, O(V+E)All source longest path: O(kE), k is 0-in-degree vetexes number, E is edge number.As a total, it’s a O(n*n+n*m) solution.A very good problem. But I really doubt it as a solve-in-20-min interview question.

• 0
点赞
• 1
收藏
觉得还不错? 一键收藏
• 打赏
• 0
评论
09-19
03-10 1912
08-26 540
09-21
11-19
01-16
03-05 595
11-22 197
02-05 7442
10-08 1万+
04-18 8381
09-08 9025
04-25 515

“相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

¥1 ¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。