【数据结构】模式字符串查找（支持通配符‘*’）

最新推荐文章于 2023-06-02 23:00:58 发布

yywMia

最新推荐文章于 2023-06-02 23:00:58 发布

阅读量1.3k

点赞数

分类专栏：数据结构文章标签：数据结构字符串 c++

本文链接：https://blog.csdn.net/qq_45314905/article/details/122085185

版权

数据结构专栏收录该内容

15 篇文章 3 订阅

订阅专栏

问题描述

在当前目录下的文件string.in中查找给定的字符串，并将查找到的字符串和行号输出到当前目录下的文件string.out中。要求：

1）从键盘输入给定的字符串，该字符串中只包含大小写字母、数字字符、中括号字符’[‘和’]’、’*’，以及字符’^’。字符串的长度不超过20。

2）字符’^‘只能出现在中括号内，且只能作为中括号内的第一个字符出现。除了字符’^’，中括号中至少包含一个以上的字母或数字。

3）字符*不会出现在中括号内

4）在给定字符串中，中括号最多出现一次。若中括号内未出现字符’^’，表示该位置上的字符只要与中括号内的任一字符相同，则匹配成功；若中括号内出现字符’^’，表示该位置上的字符与中括号内的所有字符都不相同时，匹配成功。

5）字符*可以同零个字符或者多个任意字符相匹配

6）在给定的字符串中，*号最多仅出现一次

7）*号的作用范围局限于一行，不会跨越行进行匹配

有多个字符串和*号匹配时，仅仅输出一个，并且输出这些串中长度最短的那个

9）查找字符串时大小写无关。

10）先输出查到的行号（行号从1开始），行号后跟冒号’:’，然后是查找到的字符串，多个字符串之间用逗号’,'隔开。各行之间用一个回车换行符隔开。

输入形式

首先从标准输入（键盘）读入待查找的字符串。待查找的文件string.in位于当前目录下。

输出形式

将查找到的结果输出到当前目录下的string.out中。

样例输入1

zh[ao]ng

假如string.in文件内容为：

Zhang ying ju zhu zai ZhongGuo.
Ta zheng zai du gao zhong.
Bie ren dou jia ta xiao zhang.

样例输出1

string.out文件内容为：

1:Zhang,Zhong
2:zhong
3:zhang

样例1说明

给定字符串中有中括号，表示第三个字符可以是a也可以是o，且大小写无关，因此文章中第一行的Zhang和Zhong与给定字符串匹配，故输出1:Zhang,Zhong。其它类推。

样例输入2

a[^ab]a

string.in文件内容为：

Do you like banana?
ABA is the abbreviation of American Bankers Association.

样例输出2

string.out文件内容为：

1:ana,ana

样例2说明

给定字符串中括号内有字符’^’，表示第一个和第三个字符都为a，第二个字符不能为a或b，因此文章中第一行的banana内有两个字符串ana与给定字符串匹配，故输出1:ana,ana。第二行中ABA的第二个字符为B，由于大小写无关，与给定字符串中括号内的b相同，故不能匹配。

样例输入3

w*d

string.in文件内容为：

wwwdd
world is a nice word

样例输出3

string.out文件内容为：

1:wwwd,wwd,wd
2:world,word

样例3说明

给定的字符串中有’*’，表示在一行内，可以和以’w’开头，以’d’结尾的任意字符串相匹配。在一行中，对于第一个字符’w’，同时有字符串"wwwd"以及"wwwdd"与之相匹配，根据上述第8条规则，应该匹配"wwwd"。一次类推得到’wwd’和’wd’。同样的规则用于第二行，得到"world"和"word"

代码

// Search_3.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//

#include <iostream>
#include<fstream>
#include<cstring>
#include <algorithm>
#include<vector>
using namespace std;

vector<string> ReadFile(string file) {
    ifstream fin;
    fin.open(file);
    if (!fin.is_open()) {
        exit(100);
    }
    vector<string> word;
    while (!fin.eof()) {
        char str[100];
        fin.getline(str, '\r\n');
        word.push_back(str);
    }
    fin.close();
    return word;
}

void WriteFile(string file,vector<string> word) {
    ofstream fout;
    fout.open(file);
    if (!fout.is_open()) {
        exit(100);
    }

    for (int i = 0; i < word.size(); i++) {
        if (word[i] != "") {
            fout << i + 1 << ":" << word[i] ;
        }
    }
    fout.close();
}

void SolveString(string str, int tab[]) {
    for (int i = 0; i < str.length(); i++) {
        switch (str[i]) {
        case '^':tab[0] = 1; break;
        case'*':tab[1] = i; break;
        case'[': tab[2] = i; break;
        case']':tab[3] = i; break;
        }
    }
}



char tolower(char ch) {
    if (ch <= 'Z'&& ch>='A') {
        return ch + 32;
    }
    else {
        return ch;
    }
}


vector<string> KMP(vector<string> word, string str){
    int tab[4];
    memset(tab, -1, 4 * sizeof(int));
    //tab[0]代表"^",[1]"*",[2][3]“[]”在str中开始和结束处
    SolveString(str, tab);
    vector<string> result;
    for (int i = 0; i < word.size(); i++) {
        int n = word[i].length();
        int w = 0, s = 0;                 //w代表主串位置，s代表模式串位置
        string str1 ="";
        string temp = "";
        while (w < word[i].length()) {
            
            if (tolower(word[i][w])==tolower(str[s])) {
                temp += word[i][w];
                w++; s++;
            }
            else if (s == tab[1]) {//遇到*
                s++;
                char c = tolower(str[s]);
                while (w < word[i].length() && tolower(word[i][w]) != c) {
                    temp += word[i][w];
                    w++;
                }
                if (tolower(word[i][w]) == c) {
                    temp += word[i][w];
                    s++; w++;
                }
                else {
                    temp = "";
                    s++; 
                }
                if (w == word[i].length() - 1) {//如果到一行结尾还是没有找到
                    break;
                }
                
            }
            else if (s == tab[2]) { //遇到[]
                if (tab[0] == 1) { //[]中存在“^”
                    for (int s1 = tab[2] + 2; s1 < tab[3]; s1++) {//读取括号中的内容
                        if (tolower(word[i][w]) == tolower(str[s1])) { //未匹配成功
                            
                            w = w - temp.size() + 1;
                            temp = "";
                            s = 0;
                            break;
                        }
                        else if (s1 == tab[3] - 1) {
                            temp += word[i][w];           //匹配成功
                            s = tab[3] + 1;
                            w++;
                        }
                    }//for
                    
                }
                else {//没有“^”
                    for (int s1 = tab[2] + 1; s1 < tab[3]; s1++) {
                        if (tolower(word[i][w]) == tolower(str[s1])) {
                            temp += word[i][w];
                            s = tab[3] + 1; w++;
                            break;
                        }
                        else if (s1 == tab[3] - 1) {//未匹配成功
                            w = w - temp.length() + 1; 
                            temp = "";
                            s = 0;
                        }
                    }//for   
                    
                }//else
            }//elif
            else {
                w = w - temp.length() + 1;
                temp = "";
                s = 0;
            }
            if (s == str.length()&&temp!="") {
                w = w - temp.length() + 1;
                s = 0;
                str1 += temp + ",";
                temp = "";
            }
            
        }

        if (str1 != "") {
            str1[str1.length() - 1] = '\n';
        }
        result.push_back(str1);
    }
    return result;

}

int main()
{
    string str;
    cin >> str;
    vector<string> word=ReadFile("string.in");
    word = KMP(word, str); 
    WriteFile("string.out",word);
}