WSAM_GPU

__global__ void gpu_wsam(int NUMS, int* result, char** patternStrings,
                        int patternLen, int textLen, int* position, int*
                        maxLen,
                        int* suffixLink, int* trans, size_t pitch, int**
                        rights,
                        int* rightsSize, int* statePosition)
{
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
   while (tid < NUMS) {

       result[tid] = d_wsamMatch(patternStrings[tid], patternLen, textLen,
                                 position, maxLen, suffixLink, trans, pitch,
                                 rights, rightsSize, statePosition);
       tid += blockDim.x * gridDim.x;
   }
}

__device__ int d_wsamMatch(const char *patternString, const int patternLen,
                           int textLen, int *position, int *maxLen,
                           int *suffixLink, int *trans, size_t pitch,
                           int **rights, int *rightsSize, int *statePosition) 
{
    int i = 0;
    int curCount = 0;
    int maxCount = 0;
    int p = 0;

    while (i < patternLen) {
        //如果开头就是通配符,进行单独处理
        if (patternString[i] == '?') {
            int wildNums = 0;
            while (patternString[i] == '?') {
                wildNums++;
                i++;
            }
            int index = i;
            //如果通配符数目超出文本串范围,则结果就是文本串长度
            if (wildNums >= textLen) {
                return textLen;
            }
            //如果在通配符部分模式串结束,说明全都是通配符
            if (i == patternLen) {
                return wildNums > maxCount ? wildNums : maxCount;
            }
            int tmpCount = wildNums;

            //对终点集进行遍历,终点集合的数目是确定的,从wildNums到textLen
            int cur;     //当前标号
            int curI;    //遍历中模式串位置
            int restNum; //最大max对应截至点

            for (int rit = wildNums; rit < textLen; rit++) {
                curCount = tmpCount;
                cur = statePosition[rit];
                curI = i;

                while (curI < patternLen &&
                       ((int *)((char *)trans +
                                cur * pitch))[int(patternString[curI] - 'a')] !=
                           0) {
                    curCount++;
                    cur =
                        ((int *)((char *)trans +
                                 cur * pitch))[int(patternString[curI] - 'a')];
                    curI++;
                }

                if (curCount >= maxCount) {
                    maxCount = curCount;
                    restNum = curI;
                }
            }

            //考虑通配符数目为x情况之后,需要考虑数目为x-1,到1的情况。
            //此时只用判断statePosition[wildNums]这一个位置的即可
            //因为statePosition[wildNums+1]在上一阶段已经判断过了
            while (wildNums > 0) {
                wildNums--;
                curCount = wildNums;
                curI = i;
                cur = statePosition[wildNums];
                while (curI < patternLen &&
                       ((int *)((char *)trans +
                                cur * pitch))[int(patternString[curI] - 'a')] !=
                           0) {
                    curCount++;
                    cur =
                        ((int *)((char *)trans +
                                 cur * pitch))[int(patternString[curI] - 'a')];
                    curI++;
                }
                if (curCount >= maxCount) {
                    maxCount = curCount;
                    restNum = curI;
                }
            }

            //如果最大索引是patternLen,则匹配结束
            if (restNum == patternLen) {
                return maxCount;
            }
            //否则,正常后缀自动机匹配
            else {
                i = index;
                curCount = 0;
            }
        }

        while (i < patternLen &&
               ((int *)((char *)trans +
                        p * pitch))[int(patternString[i] - 'a')] != 0) {
            curCount++;
            p = ((int *)((char *)trans +
                         p * pitch))[int(patternString[i] - 'a')];
            i++;

            //如果遇到通配符,进行处理
            if (i < patternLen && patternString[i] == '?') {
                //统计通配符的数目
                int wildNums = 0;
                while (patternString[i] == '?') {
                    wildNums++;
                    i++;
                }
                int index = i;
                int restNum; //最大max对应截至点
                while (p != 0) {
                    //如果通配符数目超出文本串范围
                    if (abs(position[p]) + wildNums >= textLen) {
                        curCount = curCount + (textLen - abs(position[p]));
                        if (curCount >= maxCount) {
                            maxCount = curCount;
                            restNum = patternLen;
                        }
                    } else if (i == patternLen) {
                        //如果在通配符部分模式串结束
                        curCount = curCount + wildNums;
                        if (curCount >= maxCount) {
                            maxCount = curCount;
                            restNum = patternLen;
                        }
                    } else {
                        int tmpCount = wildNums + curCount;
                        //对终点集进行遍历,找到最大的子串
                        int cur;
                        int curI; //遍历中模式串位置
                        for (int rit = 0; rit < rightsSize[p]; rit++) {
                            int tmp = rights[p][rit] + wildNums;
                            if (tmp >= textLen) {
                                continue;
                            }
                            curCount = tmpCount;

                            // //已经遍历过的置为-1,将来就不再遍历
                            // if (statePosition[tmp] == -1) {
                            //   continue;
                            // }
                            cur = statePosition[tmp];
                            // statePosition[tmp] = -1;

                            curI = i;
                            while (curI < patternLen &&
                                   ((int *)((char *)trans + cur * pitch))[int(
                                       patternString[curI] - 'a')] != 0) {
                                curCount++;
                                cur =
                                    ((int *)((char *)trans + cur * pitch))[int(
                                        patternString[curI] - 'a')];
                                curI++;
                            }
                            if (curCount >= maxCount) {
                                maxCount = curCount;
                                restNum = curI;
                            }
                        }
                    }
                    p = suffixLink[p];
                    curCount = maxLen[p];
                }

                // p=0相当于开头就是通配符
                if (wildNums >= textLen) {
                    return textLen;
                }
                //如果在通配符部分模式串结束,说明全都是通配符
                if (i == patternLen) {
                    return wildNums > maxCount ? wildNums : maxCount;
                }
                int tmpCount = wildNums;
                //对终点集进行遍历,终点集合的数目是确定的,从wildNums到textLen
                int cur;  //当前标号
                int curI; //遍历中模式串位置
                for (int rit = wildNums; rit < textLen; rit++) {
                    // if (statePosition[rit] == -1) {
                    //   continue;
                    // }
                    curCount = tmpCount;
                    cur = statePosition[rit];
                    curI = i;
                    while (curI < patternLen &&
                           ((int *)((char *)trans + cur * pitch))[int(
                               patternString[curI] - 'a')] != 0) {
                        curCount++;
                        cur = ((
                            int *)((char *)trans +
                                   cur *
                                       pitch))[int(patternString[curI] - 'a')];
                        curI++;
                    }

                    if (curCount >= maxCount) {
                        maxCount = curCount;
                        restNum = curI;
                    }
                }
                //考虑通配符数目为x情况之后,需要考虑数目为x-1,到1的情况。
                //此时只用判断statePosition[wildNums]这一个位置的即可
                //因为statePosition[wildNums+1]在上一阶段已经判断过了
                while (wildNums > 0) {
                    wildNums--;
                    curCount = wildNums;
                    curI = i;
                    cur = statePosition[wildNums];
                    while (curI < patternLen &&
                           ((int *)((char *)trans + cur * pitch))[int(
                               patternString[curI] - 'a')] != 0) {
                        curCount++;
                        cur = ((
                            int *)((char *)trans +
                                   cur *
                                       pitch))[int(patternString[curI] - 'a')];
                        curI++;
                    }
                    if (curCount >= maxCount) {
                        maxCount = curCount;
                        restNum = curI;
                    }
                }
                //如果最大索引是patternLen,则匹配结束
                if (restNum == patternLen) {
                    return maxCount;
                }
                //否则,正常后缀自动机匹配
                else {
                    i = index - 1;
                    p = 0;
                    break;
                }
            }
        }

        if (i == patternLen) {
            return maxCount < curCount ? curCount : maxCount;
        }
        if (p == 0) {
            maxCount = maxCount < curCount ? curCount : maxCount;
            curCount = 0;
            i++;
        } else {
            p = suffixLink[p];
            maxCount = maxCount < curCount ? curCount : maxCount;
            curCount = maxLen[p];
        }
    }
    return maxCount;
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值