在使用RK算法进行字符串匹配时,q必须是一个充分大的素数。为了减少出现假匹配的概率,先计算小于12mn2的素数集合,再从集合中随机取出一个素数,可使假匹配的概率小于1/n。此外,这个算法总能得到正确的答案。
-
#include<iostream>
-
#include<map>
-
#include<set>
-
#include<time.h>
-
using namespace std;
-
-
#define N 100 // 待匹配串的最多长度
-
#define M 20 // 模式串的最多长度
-
-
#define MULTIPLIER 0x015A4E35L
-
#define INCREMENT 1
-
-
static unsigned long seed;
-
-
/* 生成随机数种子 */
-
void random_seed ( unsigned long d ) {
-
if (d== 0 ) {
-
seed = time ( 0 );
-
} else {
-
seed = d;
-
}
-
}
-
-
/* 生成一个low~high范围内的随机数 */
-
unsigned int random ( unsigned long low, unsigned long high ) {
-
seed = MULTIPLIER * seed + INCREMENT;
-
return ( (seed>> 16 )% (high-low )+low );
-
}
-
-
long BASE; // 进制数
-
set<char> v; // 存放待匹配串和模式串中出现的字符的集合
-
map<char, int>data; // 存放待匹配串和模式串中出现的字符的哈希表
-
-
/* 将串中出现的字符放入集合 */
-
void inSet ( char s [ ] ) {
-
int n = strlen (s );
-
-
for ( int i = 0; i<n; i++ ) {
-
v. insert (s [i ] );
-
}
-
-
BASE = v. size ( );
-
}
-
-
/* 计算出现的字符的哈希表 */
-
void countMap ( ) {
-
data. clear ( );
-
int i = 0;
-
for (set<char>:: iterator iter = v. begin ( ); iter!=v. end ( ); iter++ ) {
-
data [*iter ] = i++;
-
}
-
}
-
-
/* 出现的字符的哈希函数 */
-
int ch ( char s ) {
-
return data [s ];
-
}
-
-
/*
-
* 字符串匹配
-
* 输入:存放待匹配串的数组S[],待匹配串的长度n,
-
* 存放模式串的数组P[],模式串的长度m,素数p
-
* 输出:与P相匹配的子串在待匹配串中的起始位置loc
-
*/
-
void match ( char S [ ], long n, char P [ ], long m, long &loc, long q ) {
-
long b = BASE;
-
long i, k;
-
long w = 0, p = 0;
-
long x = 1;
-
-
// 计算b^(m-1) % q
-
for (i= 0; i<m -1; i++ ) {
-
x = (x*b )%q;
-
}
-
-
// 计算第一个窗口子串的哈希值
-
for (i= 0; i<m; i++ ) {
-
w = (w*b + ch (S [i ] ) )%q;
-
}
-
-
// 计算模式串的哈希值
-
for (i= 0; i<m; i++ ) {
-
p = (p*b + ch (P [i ] ) )%q;
-
}
-
-
i = 0;
-
while ( (i<=n-m ) && (loc == -1 ) ) {
-
if (w==p ) { // 如果与模式串相等 则仔细检查是否真的相等
-
for ( k= 0; k<m; k++ ) {
-
if (S [i+k ]!=P [k ] ) break;
-
}
-
if (k>=m ) loc = i;
-
}
-
// 计算下一个窗口子串的哈希值
-
w = w - ch (S [i ] )*x%q;
-
if (w< 0 ) w += q;
-
w = (w*b + ch (S [i+m ] ) )%q;
-
i++;
-
}
-
}
-
-
/*
-
* 字符串匹配的随机算法
-
* 输入:存放待匹配串的数组S[],待匹配串的长度n,
-
* 存放模式串的数组P[],模式串的长度m,
-
* 小于12*m*n*n的素数集合R[],素数集合的元素个数a
-
* 输出:与P相匹配的子串在待匹配串中的起始位置loc
-
*/
-
void match_random ( char S [ ], long n, char P [ ], long m, long &loc, long R [ ], int a ) {
-
long q;
-
random_seed ( 0 );
-
q = random (a/ 2,a ); // 从a/2~a范围中取一个素数
-
q = R [q ];
-
match (S, n, P, m, loc, q );
-
}
-
-
/*
-
* 找素数算法
-
* 输入:所找素数的上界size
-
* 输出:所找素数集合的大小a
-
*/
-
long* findPrime ( int size, int &a ) {
-
long* r = new long [size ];
-
bool* prime = new bool [size ];
-
-
int i,j;
-
prime [ 0 ] = false;
-
for (i= 1; i<size; i++ ) {
-
prime [i ] = true;
-
}
-
-
i = 1;
-
while (i<size ) {
-
if (prime [i ] ) {
-
j = (i +1 )* 2;
-
while (j<size ) {
-
prime [j -1 ] = false;
-
j += i +1;
-
}
-
}
-
i++;
-
}
-
-
a = 0;
-
for (i= 1; i<size; i++ ) {
-
if (prime [i -1 ] ) {
-
r [a++ ] = i;
-
}
-
}
-
-
delete prime;
-
-
return r;
-
}
-
-
int main ( ) {
-
char str [N ], substr [M ];
-
-
while (cin>>str ) {
-
if (! strcmp (str, "exit" ) ) break; // 待匹配串为exit时退出
-
cin>>substr;
-
-
long loc = -1;
-
int n = strlen (str );
-
int m = strlen (substr );
-
int a;
-
long* R = findPrime ( 12*m*n*n, a ); // 计算小于12*m*n*n的素数集合
-
-
/* 计算待匹配串和模式串中出现的元素集合 */
-
v. clear ( );
-
inSet (str );
-
inSet (substr );
-
countMap ( );
-
-
// 随机匹配
-
match_random (str, n, substr, m, loc, R, a );
-
-
if (loc== -1 ) { // 如果未匹配 则尝试用最接近12*m*n*n的素数进行匹配
-
cout<< "No Found!"<<endl;
-
match (str, n, substr, m, loc, R [a -1 ] );
-
cout<< "Use Prime: "<<R [a -1 ]<<endl;
-
if (loc== -1 ) {
-
cout<< "No Found!"<<endl;
-
} else {
-
cout<< "Location In "<<loc +1<<endl;
-
}
-
} else {
-
cout<< "Location In "<<loc +1<<endl;
-
}
-
-
delete R;
-
-
system ( "pause" );
-
cout<< "================================"<<endl;
-
}
-
-
return 0;
-
}
才知道%对负数不起作用……还需要自己手工判断……