Rabin-Karp算法的基本原理:先计算目标串的哈希值,然后对待查找的字符串应用长度为模式串的滑动窗口。
当滑动窗口长度=子串长度时,我们计算滑动窗口的哈希值,并与目标串哈希值进行比较。当哈希值相同时,初步判定找到目标串。
当滑动窗口长度≠子串长度时,滑动窗口内的字符串必然不是目标串。
对于精度要求不高的场景,认为哈希值相等即为字符串相等,可以直接进行观察比较。
对于精度要求高的场景,可以启用ENBALE_COLLISION_CHECK宏,该宏提供格外的哈希碰撞检查保障。
传统单目标Rabin-Karp算法的时间复杂度为O(n+m),其中n是文本长度,m是模式长度。
由于每个滑动窗口独立,所以特别适合利用CUDA并行化。同时,由于并行的原因,我们可以无损扩展单个pattern到多个目标。
代码实现(CUDA):
#include <iostream>
#include <vector>
#include <string>
#include <cuda_runtime.h>
#define PRIME 16777619
#define ENBALE_COLLISION_CHECK false // 启用哈希碰撞检查
using namespace std;
__global__ void multiTargetRabinKarpKernel(
char *text,
char **patterns,
int *patternLengths,
int *patternHashes,
int numPatterns,
int *results,
int textLen
) {
int textIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (textIdx < textLen) {
for (int patIdx = 0; patIdx < numPatterns; patIdx++) {
int patLen = patternLengths[patIdx];
if (textIdx <= textLen - patLen) {
int hash = 0;
for (int i = 0; i < patLen; i++) {
hash = (hash * 256 + text[textIdx + i]) % PRIME;
}
if (hash == patternHashes[patIdx]) {
bool match = true;
#if ENBALE_COLLISION_CHECK
for (int i = 0; i < patLen; i++) {
if (text[textIdx + i] != patterns[patIdx][i]) {
match = false;
break;
}
}
#endif
if (match) {
results[patIdx * textLen + textIdx] = 1;
}
}
}
}
}
}
int computeHash(const char *str, int len) {
int hash = 0;
for (int i = 0; i < len; i++) {
hash = (hash * 256 + str[i]) % PRIME;
}
return hash;
}
int main() {
// prepare data
const char *text = "[example] this is a simple example text to demonstrate multi-target Rabin-Karp. 你好世界!Hello World!";
vector<string> patterns = {"example", "simple", "你好", "Hello"};
int textLen = strlen(text);
int numPatterns = patterns.size();
vector<int> patternLengths(numPatterns);
vector<int> patternHashes(numPatterns);
vector<const char*> patternPtrs(numPatterns);
int maxPatternLength = 0;
for (int i = 0; i < numPatterns; i++) {
patternLengths[i] = patterns[i].length();
patternHashes[i] = computeHash(patterns[i].c_str(), patternLengths[i]);
patternPtrs[i] = patterns[i].c_str();
maxPatternLength = max(maxPatternLength, patternLengths[i]);
}
// allocate gpu memory
char *d_text;
char **d_patterns;
int *d_patternLengths;
int *d_patternHashes;
int *d_results;
char **h_patterns = new char*[numPatterns];
int *results = new int[numPatterns * textLen]();
cudaMalloc(&d_text, textLen * sizeof(char));
cudaMalloc(&d_patterns, numPatterns * sizeof(char*));
cudaMalloc(&d_patternLengths, numPatterns * sizeof(int));
cudaMalloc(&d_patternHashes, numPatterns * sizeof(int));
cudaMalloc(&d_results, numPatterns * textLen * sizeof(int));
// copy data for each cuda mode
for (int i = 0; i < numPatterns; i++) {
cudaMalloc(&h_patterns[i], patternLengths[i] * sizeof(char));
cudaMemcpy(h_patterns[i], patterns[i].c_str(), patternLengths[i] * sizeof(char), cudaMemcpyHostToDevice);
}
cudaMemcpy(d_patterns, h_patterns, numPatterns * sizeof(char*), cudaMemcpyHostToDevice);
cudaMemcpy(d_text, text, textLen * sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy(d_patternLengths, patternLengths.data(), numPatterns * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_patternHashes, patternHashes.data(), numPatterns * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_results, results, numPatterns * textLen * sizeof(int), cudaMemcpyHostToDevice);
// start kernel func
int blockSize = 256;
int numBlocks = (textLen + blockSize - 1) / blockSize;
multiTargetRabinKarpKernel<<<numBlocks, blockSize>>>(
d_text, d_patterns, d_patternLengths, d_patternHashes,
numPatterns, d_results, textLen
);
// to cpu
cudaMemcpy(results, d_results, numPatterns * textLen * sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i < numPatterns; i++) {
cout << "Matches for pattern \"" << patterns[i] << "\":" << endl;
for (int j = 0; j < textLen; j++) {
if (results[i * textLen + j] == 1) {
cout << " Found at index " << j << endl;
}
}
}
cudaFree(d_text);
cudaFree(d_patterns);
cudaFree(d_patternLengths);
cudaFree(d_patternHashes);
cudaFree(d_results);
for (int i = 0; i < numPatterns; i++) {
cudaFree(h_patterns[i]);
}
delete[] h_patterns;
delete[] results;
return 0;
}