字符串匹配-KMP,BM,Robin Karp-等

最新推荐文章于 2023-06-09 23:32:21 发布

穴工

最新推荐文章于 2023-06-09 23:32:21 发布

阅读量364

点赞数

分类专栏：编程文章标签：学习算法

本文链接：https://blog.csdn.net/jiongmumu/article/details/11794801

版权

编程专栏收录该内容

11 篇文章 0 订阅

订阅专栏

字符串匹配果然是一个很深奥的需要仔细学习的问题

好多各种字符串匹配算法啊！

搜了网上资料，总是漏洞百出啊，看来还是书本资料比较靠谱。

32.1介绍了brute force的匹配

32.2.Robin Karp

算法导论1014页

主要思想就是对数字hash到某个值，然后计算是否相似，可能hash到同一个值，所以还需要比较的。

d，p的选择也是重要的。d相当于一个基数，算法导论介绍时候设d为10方便读者理解。算法导论这书果然还是比网上资料靠谱很多啊。

32.3 String matching with finite automata

构建自动机好麻烦啊。

留着以后学习==

32.4 The Knuth-Morris-Pratt algorithm

我觉得是对自动机算法的简化版本。自动机就是为了这个做铺垫。

自动机构建很麻烦。

看了下懒得自己真的编码实现了，又是理解大概思路我又懒得深究了。

又是中秋节，那么凄凉的感觉==

截取网上对其的分析

求next数组

//代码4-1    
//修正后的求next数组各值的函数代码    
void get_nextval(char const* ptrn, int plen, int* nextval)    
{    
    int i = 0;     
    nextval[i] = -1;    
    int j = -1;    
    while( i < plen-1 )    
    {    
        if( j == -1 || ptrn[i] == ptrn[j] )   //循环的if部分    
        {    
            ++i;    
            ++j;    
            //修正的地方就发生下面这4行    
            if( ptrn[i] != ptrn[j] ) //++i，++j之后，再次判断ptrn[i]与ptrn[j]的关系    
                nextval[i] = j;      //之前的错误解法就在于整个判断只有这一句。    
            else    
                nextval[i] = nextval[j];    
        }    
        else                                 //循环的else部分    
            j = nextval[j];    
    }    
}    

有 n 个长为 m+1 的字符串，
如果某个字符串的最后 m 个字符与某个字符串的前 m 个字符匹配，则两个字符串可以联接，
问这 n 个字符串最多可以连成一个多长的字符串，如果出现循环，则返回错误。
ANSWER
This is identical to the problem to find the longest acylic path in a directed graph. If there is a cycle, return
false.
Firstly, build the graph. Then search the graph for the longest path.

#define MAX_NUM 201
int inDegree[MAX_NUM];
int longestConcat(char ** strs, int m, int n) {
int graph[MAX_NUM][MAX_NUM];
int prefixHash[MAX_NUM];
int suffixHash[MAX_NUM];
int i,j;
for (i=0; i<n; i++) {
calcHash(strs[i], prefixHash[i], suffixHash[i]);
graph[i][0] = 0;
}
memset(inDegree, 0, sizeof(int)*n);
for (i=0; i<n; i++) {
for (j=0; j<n; j++) {
if (suffixHash[i]==prefixHash[j] && strncmp(strs[i]+1, strs[j], m) == 0) {
if (i==j) return 0; // there is a self loop, return false.
graph[i][0] ++;
graph[i][graph[i*n]] = j;
inDegree[j] ++;
}
}
}
return longestPath(graph, n);
}
/**
* 1. do topological sort, record index[i] in topological order.
* 2. for all 0-in-degree vertexes, set all path length to -1, do relaxation in topological order to find single
source shortest path.
*/
int visit[MAX_NUM];
int parent[MAX_NUM];
// -1 path weight, so 0 is enough.
#define MAX_PATH 0
int d[MAX_NUM];
int longestPath(int graph[], int n) {
memset(visit, 0, n*sizeof(int));
if (topSort(graph) == 0) return -1; //topological sort failed, there is cycle.
int min = 0;
for (int i=0; i<n; i++) {
if (inDegree[i] != 0) continue;
memset(parent, -1, n*sizeof(int));
memset(d, MAX_PATH, n*sizeof(int));
d[i] = 0;
for (int j=0; j<n; j++) {
for (int k=1; k<=graph[top[j]][0]; k++) {
if (d[top[j]] - 1 < d[graph[top[j]][k]]) { // relax with path weight -1
d[graph[top[j]][k]] = d[top[j]] - 1;
parent[graph[top[j]][k]] = top[j];
if (d[graph[top[j]][k]] < min) min = d[graph[top[j]][k]];
}
}
}
}
return -min;
}
int top[MAX_NUM];
int finished[MAX_NUM];
int cnt = 0;
int topSort(int graph[]){
memset(visit, 0, n*sizeof(int));
memset(finished, 0, n*sizeof(int));
for (int i=0; i<n; i++) {
if (topdfs(graph, i) == 0) return 0;
}
return 1;
}
int topdfs(int graph[], int s) {
if (visited[s] != 0) return 1;
for (int i=1; i<=graph[s][0]; i++) {
if (visited[graph[s][i]]!=0 && finished[graph[s][i]]==0) {
return 0; //gray node, a back edge;
}
if (visited[graph[s][i]] == 0) {
visited[graph[s][i]] = 1;
dfs(graph, graph[s][i]);
}
}
finished[s] = 1;
top[cnt++] = s;
return 1;
}



Time complexity analysis:
Hash calculation: O(nm)
Graph construction: O(n*n)
Toplogical sort: as dfs, O(V+E)
All source longest path: O(kE), k is 0-in-degree vetexes number, E is edge number.
As a total, it’s a O(n*n+n*m) solution.
A very good problem. But I really doubt it as a solve-in-20-min interview question.

参考：

http://blog.csdn.net/v_july_v/article/details/7041827

http://www.ruanyifeng.com/blog/2013/05/boyer-moore_string_search_algorithm.html BM算法介绍不错，我又懒得看了==