__global__ void gpu_wsam(int NUMS, int* result, char** patternStrings,
int patternLen, int textLen, int* position, int*
maxLen,
int* suffixLink, int* trans, size_t pitch, int**
rights,
int* rightsSize, int* statePosition)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
while (tid < NUMS) {
result[tid] = d_wsamMatch(patternStrings[tid], patternLen, textLen,
position, maxLen, suffixLink, trans, pitch,
rights, rightsSize, statePosition);
tid += blockDim.x * gridDim.x;
}
}
__device__ int d_wsamMatch(const char *patternString, const int patternLen,
int textLen, int *position, int *maxLen,
int *suffixLink, int *trans, size_t pitch,
int **rights, int *rightsSize, int *statePosition)
{
int i = 0;
int curCount = 0;
int maxCount = 0;
int p = 0;
while (i < patternLen) {
//如果开头就是通配符,进行单独处理
if (patternString[i] == '?') {
int wildNums = 0;
while (patternString[i] == '?') {
wildNums++;
i++;
}
int index = i;
//如果通配符数目超出文本串范围,则结果就是文本串长度
if (wildNums >= textLen) {
return textLen;
}
//如果在通配符部分模式串结束,说明全都是通配符
if (i == patternLen) {
return wildNums > maxCount ? wildNums : maxCount;
}
int tmpCount = wildNums;
//对终点集进行遍历,终点集合的数目是确定的,从wildNums到textLen
int cur; //当前标号
int curI; //遍历中模式串位置
int restNum; //最大max对应截至点
for (int rit = wildNums; rit < textLen; rit++) {
curCount = tmpCount;
cur = statePosition[rit];
curI = i;
while (curI < patternLen &&
((int *)((char *)trans +
cur * pitch))[int(patternString[curI] - 'a')] !=
0) {
curCount++;
cur =
((int *)((char *)trans +
cur * pitch))[int(patternString[curI] - 'a')];
curI++;
}
if (curCount >= maxCount) {
maxCount = curCount;
restNum = curI;
}
}
//考虑通配符数目为x情况之后,需要考虑数目为x-1,到1的情况。
//此时只用判断statePosition[wildNums]这一个位置的即可
//因为statePosition[wildNums+1]在上一阶段已经判断过了
while (wildNums > 0) {
wildNums--;
curCount = wildNums;
curI = i;
cur = statePosition[wildNums];
while (curI < patternLen &&
((int *)((char *)trans +
cur * pitch))[int(patternString[curI] - 'a')] !=
0) {
curCount++;
cur =
((int *)((char *)trans +
cur * pitch))[int(patternString[curI] - 'a')];
curI++;
}
if (curCount >= maxCount) {
maxCount = curCount;
restNum = curI;
}
}
//如果最大索引是patternLen,则匹配结束
if (restNum == patternLen) {
return maxCount;
}
//否则,正常后缀自动机匹配
else {
i = index;
curCount = 0;
}
}
while (i < patternLen &&
((int *)((char *)trans +
p * pitch))[int(patternString[i] - 'a')] != 0) {
curCount++;
p = ((int *)((char *)trans +
p * pitch))[int(patternString[i] - 'a')];
i++;
//如果遇到通配符,进行处理
if (i < patternLen && patternString[i] == '?') {
//统计通配符的数目
int wildNums = 0;
while (patternString[i] == '?') {
wildNums++;
i++;
}
int index = i;
int restNum; //最大max对应截至点
while (p != 0) {
//如果通配符数目超出文本串范围
if (abs(position[p]) + wildNums >= textLen) {
curCount = curCount + (textLen - abs(position[p]));
if (curCount >= maxCount) {
maxCount = curCount;
restNum = patternLen;
}
} else if (i == patternLen) {
//如果在通配符部分模式串结束
curCount = curCount + wildNums;
if (curCount >= maxCount) {
maxCount = curCount;
restNum = patternLen;
}
} else {
int tmpCount = wildNums + curCount;
//对终点集进行遍历,找到最大的子串
int cur;
int curI; //遍历中模式串位置
for (int rit = 0; rit < rightsSize[p]; rit++) {
int tmp = rights[p][rit] + wildNums;
if (tmp >= textLen) {
continue;
}
curCount = tmpCount;
// //已经遍历过的置为-1,将来就不再遍历
// if (statePosition[tmp] == -1) {
// continue;
// }
cur = statePosition[tmp];
// statePosition[tmp] = -1;
curI = i;
while (curI < patternLen &&
((int *)((char *)trans + cur * pitch))[int(
patternString[curI] - 'a')] != 0) {
curCount++;
cur =
((int *)((char *)trans + cur * pitch))[int(
patternString[curI] - 'a')];
curI++;
}
if (curCount >= maxCount) {
maxCount = curCount;
restNum = curI;
}
}
}
p = suffixLink[p];
curCount = maxLen[p];
}
// p=0相当于开头就是通配符
if (wildNums >= textLen) {
return textLen;
}
//如果在通配符部分模式串结束,说明全都是通配符
if (i == patternLen) {
return wildNums > maxCount ? wildNums : maxCount;
}
int tmpCount = wildNums;
//对终点集进行遍历,终点集合的数目是确定的,从wildNums到textLen
int cur; //当前标号
int curI; //遍历中模式串位置
for (int rit = wildNums; rit < textLen; rit++) {
// if (statePosition[rit] == -1) {
// continue;
// }
curCount = tmpCount;
cur = statePosition[rit];
curI = i;
while (curI < patternLen &&
((int *)((char *)trans + cur * pitch))[int(
patternString[curI] - 'a')] != 0) {
curCount++;
cur = ((
int *)((char *)trans +
cur *
pitch))[int(patternString[curI] - 'a')];
curI++;
}
if (curCount >= maxCount) {
maxCount = curCount;
restNum = curI;
}
}
//考虑通配符数目为x情况之后,需要考虑数目为x-1,到1的情况。
//此时只用判断statePosition[wildNums]这一个位置的即可
//因为statePosition[wildNums+1]在上一阶段已经判断过了
while (wildNums > 0) {
wildNums--;
curCount = wildNums;
curI = i;
cur = statePosition[wildNums];
while (curI < patternLen &&
((int *)((char *)trans + cur * pitch))[int(
patternString[curI] - 'a')] != 0) {
curCount++;
cur = ((
int *)((char *)trans +
cur *
pitch))[int(patternString[curI] - 'a')];
curI++;
}
if (curCount >= maxCount) {
maxCount = curCount;
restNum = curI;
}
}
//如果最大索引是patternLen,则匹配结束
if (restNum == patternLen) {
return maxCount;
}
//否则,正常后缀自动机匹配
else {
i = index - 1;
p = 0;
break;
}
}
}
if (i == patternLen) {
return maxCount < curCount ? curCount : maxCount;
}
if (p == 0) {
maxCount = maxCount < curCount ? curCount : maxCount;
curCount = 0;
i++;
} else {
p = suffixLink[p];
maxCount = maxCount < curCount ? curCount : maxCount;
curCount = maxLen[p];
}
}
return maxCount;
}