《数据结构课程设计》——文本文件单词的检索与计数(代码实现)

《数据结构课程设计》——文本文件单词的检索与计数(代码实现)


前言

本文为文本文件单词的检索与计数的代码实现


一、属性声明

public class WordSearch {
    //键盘扫描器
    private static Scanner sc = new Scanner(System.in);
    //存储每行子串P所出现的次数
    private static int[] count1;
    private static int[] count2;
    //存储行数
    private static int lines;
    //定义一个最大行数
    private static int MAXLines = 100;
    //存储每行字符序列的长度
    private static int[] linesLength1 = new int[MAXLines];
    private static int[] linesLength2 = new int[MAXLines];

二、主方法

 public static void main(String[] args) throws Exception {
        //建立文本文件
        createFile("D://123.txt");
        //覆盖写入
        writerOverFile("D://123.txt", "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "A  baby A baby a baby \na Baby A baby a baby\na baby A baby a baby\na baby A baby a baby\n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
                "A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n"+
                "A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n"+
                "A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n"+
                "A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n");
        //源字符串S
        String strS = fileRead("D://123.txt");
        //要查询的子串P
        String strP = "baby";
        //调用主菜单显示方法
        mainMenu(strS, strP);

    }

三、操作实现方法

/**
     * 主菜单显示
     */
    private static void mainMenu(String strS, String strP) {
        System.out.println("*********************************文本文件单词的检索与计数*********************************");
        System.out.println("1.朴素模式匹配算法;");
        System.out.println("2.KMP算法;");
        System.out.println("请选择:");
        String choice;
        choice = sc.next();
        switch (choice) {
            //KMP算法
            case "2":
                long start_time_2 = System.currentTimeMillis();
                //计算子串P的next[]数组
                int[] next = kmpNext("baby");
                //检索子串P在主串S中出现的位置索引以及行数
                int[] index = kmpSearch(strS, strP, next);
                //计算子串P文本文件中出现的总次数
                int times = 0;
                int i = 0;
                for (i = 0; i < count1.length; i++) {
                    times += count1[i];
                }
                i = 0;
                //信息输出
                System.out.println(strP + "出现的总次数为:" + times);
                for (int j = 0; j < lines; j++) {
                    System.out.println();
                    System.out.println("所在行数:" + (j + 1));
                    System.out.println();
                    int num = 0;
                    for (; i < times; i++) {
                        System.out.println("所在位置索引号:" + index[i]+"号");
                        num++;
                        if (num == count1[j]) {
                            i++;
                            break;
                        }
                    }
                }
                long end_time_2 = System.currentTimeMillis();
                long totalTime_2 = end_time_2 - start_time_2;
                System.out.println("KMP运行时间:" + totalTime_2 + "ms");
                System.out.println("输入0返回上一层菜单!");
                String result = null;
                result = sc.next();
                if (result.equals("0")) {
                    mainMenu(strS, strP);
                } else {
                    System.out.println("系统即将退出!");
                    System.exit(0);
                }

                break;
            //朴素模式匹配算法
            case "1":
                long start_time_1 = System.currentTimeMillis();
                int[] index2 = violentMatch(strS, strP);
                int times2 = 0;
                for (i = 0; i < count2.length; i++) {
                    times2 += count2[i];
                }
                System.out.println("总次数为:" + times2);
                i = 0;
                for (int j = 0; j < lines; j++) {
                    System.out.println();
                    System.out.println("所在行数为:" + (j + 1));
                    System.out.println();
                    int num = 0;
                    for (; i < times2; i++) {
                        System.out.println("所在位置索引号:" + index2[i]+"号");
                        num++;
                        if (num == count2[j]) {
                            i++;
                            break;
                        }
                    }
                }
                long end_time_1 = System.currentTimeMillis();
                long totalTime_1 = end_time_1 - start_time_1;
                System.out.println("暴力匹配算法运行时间:" + totalTime_1 + "ms");
                System.out.println("输入0返回上一层菜单!");
                String result1 = null;
                result1 = sc.next();
                if (result1.equals("0")) {
                    mainMenu(strS, strP);
                } else {
                    System.out.println("系统即将退出!");
                    System.exit(0);
                }

                break;
            default:
                //返回上一层菜单,进行验证算法正确性
                mainMenu(strS, strP);
        }
    }

    /**
     * 创建文本文件
     *
     * @param filePath
     */
    private static void createFile(String filePath) {
        //创建文本文件
        File file = new File(filePath);
        //判断该路径下的文件是否存在,如果不存在就创建一个,若存在就不需要做任何操作
        if (!file.exists()) {
            try {
                file.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 向文本文件中追加内容
     *
     * @param filePath
     * @param content
     */
    private static void contentAppend(String filePath, String content) {
        FileWriter fw = null;
        try {
            fw = new FileWriter(filePath, true);
            fw.write(content);
            fw.flush();
            fw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    /**
     * 覆盖重写文本文件中的内容(更新文本文件)
     *
     * @param filePath
     * @param content
     */
    private static void writerOverFile(String filePath, String content) {
        PrintWriter pw = null;
        try {
            pw = new PrintWriter(filePath);
            pw.write(content);
            pw.flush();
            pw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 文件读取
     *
     * @param filePath
     */
    private static String fileRead(String filePath) throws Exception {
        int i = 0;
        File file = new File(filePath);
        FileReader fr = new FileReader(file);
        BufferedReader br = new BufferedReader(fr);
        StringBuilder sb = new StringBuilder();
        String s = "";
        //逐行读取文件的内容,不读取换行符和末尾的空格
        while ((s = br.readLine()) != null) {
            lines++;
            //如果是第一行,直接将每行长度赋值给长度数组
            if (i == 0) {
                linesLength1[i] = s.length();
                //如果是大于1行,需要将叠加长度赋值给长度数组
            } else {
                linesLength1[i] = s.length() + 1 + linesLength1[i - 1];
            }
            linesLength2[i] = s.length();
            i++;
            sb.append(s + "\n");
            //输出主串S
            System.out.println(s);
        }
        br.close();
        String str = sb.toString();
        return str;
    }

    /**
     * 获取子串的部分匹配值表
     *
     * @param dest
     * @return
     */
    private static int[] kmpNext(String dest) {
        //创建next数组保存部分匹配值
        int[] next = new int[dest.length()];
        //如果字符串的长度为1,那么部分匹配值就是0
        next[0] = 0;
        for (int i = 1, j = 0; i < dest.length(); i++) {
            //当dest.charAt(i)!=dest.charAt(j),我们需要从next[j-1]中获取新的j
            //知道我们发现,有dest.charAt(i)==dest.charAt(j),则说明找到了该单词
            while (j > 0 && dest.charAt(i) != dest.charAt(j)) {
                j = next[j - 1];
            }
            //当dest.charAt(i)==dest.charAt(j),部分匹配值加1
            if (dest.charAt(i) == dest.charAt(j)) {
                j++;
            }
            next[i] = j;
        }
        return next;
    }

    /**
     * 采用KMP算法实现单词的行数和位置查询
     *
     * @param str1 主串S
     * @param str2 子串P
     * @param next 子串所对应的部分匹配表
     * @return 如果是-1,就说明没有匹配到,否则就返回第一个匹配的位置
     */
    private static int[] kmpSearch(String str1, String str2, int[] next) {
        //计算每行P子串出现的次数
        int count = 0;
        //存储所有找到的位置的索引
        int[] index = new int[str1.length()];
        count1 = new int[lines];
        index[0] = -1;
        int i = 0;
        int j = 0;
        int k = 0;
        //遍历
        for (int h = 0; h < lines; h++) {
            int temp1 = 0;
            for (; i < linesLength1[h]; i++) {
                //需要处理str1.charAt(i)!=str2.charAt(j),去调整j的大小
                while (j > 0 && str1.charAt(i) != str2.charAt(j)) {
                    j = next[j - 1];
                }
                //在该索引处,主串和子串的字符相同,则均向后移一位
                if (str1.charAt(i) == str2.charAt(j)) {
                    j++;
                }
                //说明找到了该单词
                if (j == str2.length()) {
                    index[k] = temp1 - j + 1;
                    //将j重新归位
                    j = 0;
                    k++;
                    count++;
                }
                temp1++;
            }
            i++;
            //将每行子串出现的次数存储起来
            count1[h] = count;
            count = 0;
            j = 0;
        }
        return index;
    }

    /**
     * @param strS 主串
     * @param strP 要查询的子串
     * @return
     */
    private static int[] violentMatch(String strS, String strP) {
        //计算每行子串出现的次数
        int count = 0;
        count2 = new int[lines];
        int[] index = new int[strS.length()];
        int[]position=new int[strS.length()];
        char[] s = strS.toCharArray();
        char[] p = strP.toCharArray();
        int sLen = s.length - 1;
        int pLen = p.length;
        //让i索引指向s;
        int i = 0;
        //让j索引指向p
        int j = 0;
        int k = 0;
        for (int h = 0; h < lines; h++) {
            int temp = 0;
            //保证匹配时不越界
            while (i < linesLength1[h] && j < pLen) {
                //匹配成功
                if (s[i] == p[j]) {
                    //索引顺延一位
                    i++;
                    j++;
                    temp++;
                    //匹配失败
                } else {
//如果失配,即是s[i]!=p[j],令i=i-(j-1),j=0,进行主串的回溯
                    i=i-(j-1);
                    temp = temp - (j - 1);
                    j = 0;
                }
                //判断是否匹配成功
                if (j == pLen) {
                    index[k] = temp - j;
                    k++;
                    count++;
                    j = 0;
                }
                if (i == linesLength1[h]) {
                    j = 0;
                    count2[h] = count;
                    count = 0;
                    i++;
                    break;
                }
            }
        }

        return index;
    }
}

四、运行结果(部分截图)

文件读取与写入:
在这里插入图片描述

①暴力匹配算法实现:
在这里插入图片描述
在这里插入图片描述
②KMP算法实现:
在这里插入图片描述
在这里插入图片描述

五、总结

本项目的实现主要采用了暴力匹配算法(朴素模式匹配算法)和KMP算法,其中暴力匹配算法相比于KMP算法的思想更简单易懂,但是平均时间复杂度会更高,因为朴素模式匹配算法需要“傻瓜式”的的回溯,比较麻烦,而对于KMP算法来说,next[]数组是解决回溯问题的关键,也大大降低了算法的检索时间,是对朴素模式匹配算法的改进。当数据量大时,KMP算法的好处就会显而易见了。

  • 4
    点赞
  • 41
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值