《数据结构课程设计》——文本文件单词的检索与计数(代码实现)
前言
本文为文本文件单词的检索与计数的代码实现
一、属性声明
public class WordSearch {
//键盘扫描器
private static Scanner sc = new Scanner(System.in);
//存储每行子串P所出现的次数
private static int[] count1;
private static int[] count2;
//存储行数
private static int lines;
//定义一个最大行数
private static int MAXLines = 100;
//存储每行字符序列的长度
private static int[] linesLength1 = new int[MAXLines];
private static int[] linesLength2 = new int[MAXLines];
二、主方法
public static void main(String[] args) throws Exception {
//建立文本文件
createFile("D://123.txt");
//覆盖写入
writerOverFile("D://123.txt", "a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"A baby A baby a baby \na Baby A baby a baby\na baby A baby a baby\na baby A baby a baby\n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"a baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n" +
"A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n"+
"A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n"+
"A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n"+
"A baby A baby a baby a girl is a girl a boy is a boy they are children a girl is a girl \n");
//源字符串S
String strS = fileRead("D://123.txt");
//要查询的子串P
String strP = "baby";
//调用主菜单显示方法
mainMenu(strS, strP);
}
三、操作实现方法
/**
* 主菜单显示
*/
private static void mainMenu(String strS, String strP) {
System.out.println("*********************************文本文件单词的检索与计数*********************************");
System.out.println("1.朴素模式匹配算法;");
System.out.println("2.KMP算法;");
System.out.println("请选择:");
String choice;
choice = sc.next();
switch (choice) {
//KMP算法
case "2":
long start_time_2 = System.currentTimeMillis();
//计算子串P的next[]数组
int[] next = kmpNext("baby");
//检索子串P在主串S中出现的位置索引以及行数
int[] index = kmpSearch(strS, strP, next);
//计算子串P文本文件中出现的总次数
int times = 0;
int i = 0;
for (i = 0; i < count1.length; i++) {
times += count1[i];
}
i = 0;
//信息输出
System.out.println(strP + "出现的总次数为:" + times);
for (int j = 0; j < lines; j++) {
System.out.println();
System.out.println("所在行数:" + (j + 1));
System.out.println();
int num = 0;
for (; i < times; i++) {
System.out.println("所在位置索引号:" + index[i]+"号");
num++;
if (num == count1[j]) {
i++;
break;
}
}
}
long end_time_2 = System.currentTimeMillis();
long totalTime_2 = end_time_2 - start_time_2;
System.out.println("KMP运行时间:" + totalTime_2 + "ms");
System.out.println("输入0返回上一层菜单!");
String result = null;
result = sc.next();
if (result.equals("0")) {
mainMenu(strS, strP);
} else {
System.out.println("系统即将退出!");
System.exit(0);
}
break;
//朴素模式匹配算法
case "1":
long start_time_1 = System.currentTimeMillis();
int[] index2 = violentMatch(strS, strP);
int times2 = 0;
for (i = 0; i < count2.length; i++) {
times2 += count2[i];
}
System.out.println("总次数为:" + times2);
i = 0;
for (int j = 0; j < lines; j++) {
System.out.println();
System.out.println("所在行数为:" + (j + 1));
System.out.println();
int num = 0;
for (; i < times2; i++) {
System.out.println("所在位置索引号:" + index2[i]+"号");
num++;
if (num == count2[j]) {
i++;
break;
}
}
}
long end_time_1 = System.currentTimeMillis();
long totalTime_1 = end_time_1 - start_time_1;
System.out.println("暴力匹配算法运行时间:" + totalTime_1 + "ms");
System.out.println("输入0返回上一层菜单!");
String result1 = null;
result1 = sc.next();
if (result1.equals("0")) {
mainMenu(strS, strP);
} else {
System.out.println("系统即将退出!");
System.exit(0);
}
break;
default:
//返回上一层菜单,进行验证算法正确性
mainMenu(strS, strP);
}
}
/**
* 创建文本文件
*
* @param filePath
*/
private static void createFile(String filePath) {
//创建文本文件
File file = new File(filePath);
//判断该路径下的文件是否存在,如果不存在就创建一个,若存在就不需要做任何操作
if (!file.exists()) {
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 向文本文件中追加内容
*
* @param filePath
* @param content
*/
private static void contentAppend(String filePath, String content) {
FileWriter fw = null;
try {
fw = new FileWriter(filePath, true);
fw.write(content);
fw.flush();
fw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 覆盖重写文本文件中的内容(更新文本文件)
*
* @param filePath
* @param content
*/
private static void writerOverFile(String filePath, String content) {
PrintWriter pw = null;
try {
pw = new PrintWriter(filePath);
pw.write(content);
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 文件读取
*
* @param filePath
*/
private static String fileRead(String filePath) throws Exception {
int i = 0;
File file = new File(filePath);
FileReader fr = new FileReader(file);
BufferedReader br = new BufferedReader(fr);
StringBuilder sb = new StringBuilder();
String s = "";
//逐行读取文件的内容,不读取换行符和末尾的空格
while ((s = br.readLine()) != null) {
lines++;
//如果是第一行,直接将每行长度赋值给长度数组
if (i == 0) {
linesLength1[i] = s.length();
//如果是大于1行,需要将叠加长度赋值给长度数组
} else {
linesLength1[i] = s.length() + 1 + linesLength1[i - 1];
}
linesLength2[i] = s.length();
i++;
sb.append(s + "\n");
//输出主串S
System.out.println(s);
}
br.close();
String str = sb.toString();
return str;
}
/**
* 获取子串的部分匹配值表
*
* @param dest
* @return
*/
private static int[] kmpNext(String dest) {
//创建next数组保存部分匹配值
int[] next = new int[dest.length()];
//如果字符串的长度为1,那么部分匹配值就是0
next[0] = 0;
for (int i = 1, j = 0; i < dest.length(); i++) {
//当dest.charAt(i)!=dest.charAt(j),我们需要从next[j-1]中获取新的j
//知道我们发现,有dest.charAt(i)==dest.charAt(j),则说明找到了该单词
while (j > 0 && dest.charAt(i) != dest.charAt(j)) {
j = next[j - 1];
}
//当dest.charAt(i)==dest.charAt(j),部分匹配值加1
if (dest.charAt(i) == dest.charAt(j)) {
j++;
}
next[i] = j;
}
return next;
}
/**
* 采用KMP算法实现单词的行数和位置查询
*
* @param str1 主串S
* @param str2 子串P
* @param next 子串所对应的部分匹配表
* @return 如果是-1,就说明没有匹配到,否则就返回第一个匹配的位置
*/
private static int[] kmpSearch(String str1, String str2, int[] next) {
//计算每行P子串出现的次数
int count = 0;
//存储所有找到的位置的索引
int[] index = new int[str1.length()];
count1 = new int[lines];
index[0] = -1;
int i = 0;
int j = 0;
int k = 0;
//遍历
for (int h = 0; h < lines; h++) {
int temp1 = 0;
for (; i < linesLength1[h]; i++) {
//需要处理str1.charAt(i)!=str2.charAt(j),去调整j的大小
while (j > 0 && str1.charAt(i) != str2.charAt(j)) {
j = next[j - 1];
}
//在该索引处,主串和子串的字符相同,则均向后移一位
if (str1.charAt(i) == str2.charAt(j)) {
j++;
}
//说明找到了该单词
if (j == str2.length()) {
index[k] = temp1 - j + 1;
//将j重新归位
j = 0;
k++;
count++;
}
temp1++;
}
i++;
//将每行子串出现的次数存储起来
count1[h] = count;
count = 0;
j = 0;
}
return index;
}
/**
* @param strS 主串
* @param strP 要查询的子串
* @return
*/
private static int[] violentMatch(String strS, String strP) {
//计算每行子串出现的次数
int count = 0;
count2 = new int[lines];
int[] index = new int[strS.length()];
int[]position=new int[strS.length()];
char[] s = strS.toCharArray();
char[] p = strP.toCharArray();
int sLen = s.length - 1;
int pLen = p.length;
//让i索引指向s;
int i = 0;
//让j索引指向p
int j = 0;
int k = 0;
for (int h = 0; h < lines; h++) {
int temp = 0;
//保证匹配时不越界
while (i < linesLength1[h] && j < pLen) {
//匹配成功
if (s[i] == p[j]) {
//索引顺延一位
i++;
j++;
temp++;
//匹配失败
} else {
//如果失配,即是s[i]!=p[j],令i=i-(j-1),j=0,进行主串的回溯
i=i-(j-1);
temp = temp - (j - 1);
j = 0;
}
//判断是否匹配成功
if (j == pLen) {
index[k] = temp - j;
k++;
count++;
j = 0;
}
if (i == linesLength1[h]) {
j = 0;
count2[h] = count;
count = 0;
i++;
break;
}
}
}
return index;
}
}
四、运行结果(部分截图)
文件读取与写入:
①暴力匹配算法实现:
②KMP算法实现:
五、总结
本项目的实现主要采用了暴力匹配算法(朴素模式匹配算法)和KMP算法,其中暴力匹配算法相比于KMP算法的思想更简单易懂,但是平均时间复杂度会更高,因为朴素模式匹配算法需要“傻瓜式”的的回溯,比较麻烦,而对于KMP算法来说,next[]数组是解决回溯问题的关键,也大大降低了算法的检索时间,是对朴素模式匹配算法的改进。当数据量大时,KMP算法的好处就会显而易见了。