文字检索问题
类的定义
io文件读取
代码:
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* @author 莉莉安
* @ClassName: IntelliJ IDEA
* @Description: ${TODO}
* @date 2021/3/10 19:58
* ${tags}
*/
public class Read {
public BufferedReader br;
public File file =new File("C:\\Users\\莉莉安\\Desktop\\java\\数据结构实践\\文字搜索\\src\\imformation.txt");
public Read(){}
public List<String> reader(){
List<String> list = new ArrayList<>();
//创建字符缓冲输入流
try {
br = new BufferedReader(
new InputStreamReader(new FileInputStream(file)));
String line;
while ((line = br.readLine()) != null) {
list.add(line);
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
字串类为了将字符串结果用列表表示故设此类
代码 :
/**
* @author 莉莉安
* @ClassName: IntelliJ IDEA
* @Description: ${TODO}
* @date 2021/3/10 20:44
* ${tags}
*/
/**
* 字符串类
*/
public class Str {
private int row;
private int start;
public Str() {
}
public Str(int row, int start) {
this.row = row;
this.start = start;
}
public int getRow() {
return row;
}
public void setRow(int row) {
this.row = row;
}
public int getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
}
功能类
用于将算法实现
代码:
/**
* @author 莉莉安
* @ClassName: IntelliJ IDEA
* @Description: ${TODO}
* @date 2021/3/10 19:56
* ${tags}
*/
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class Word {
List<String> list = null;
String word;
Scanner scan =new Scanner(System.in);
Read read=new Read();
public void start(){
list = read.reader();
System.out.println("请输入要查询的单词");
word= scan.next();
System.out.println("请输入您的查找方式:");
System.out.println("1.KMP");
System.out.println("2.BF");
System.out.println("3.退出");
int chioce=scan.nextInt();
switch(chioce)
{
case 1:KMP(list,word);break;
case 2: BF(list,word);break;
case 3:return;
default:
System.out.println("输入无效重新输入");
start();
}
}
public void KMP(List<String> list,String goal) {
List<Str> s=new ArrayList<>();
for(int q=0;q<list.size();q++)
{
int i=0;
while(i< list.get(q).length())
{
char[] t = list.get(q).toCharArray();
char[] p = goal.toCharArray();
int j = 0; // 模式串的位置
int[] next = next(goal);
while (i < t.length && j < p.length) {
if (j == -1 || t[i] == p[j]) { // 当j为-1时,要移动的是i,当然j也要归0
i++;
j++;
} else {
j = next[j]; // j回到指定位置
}
}
if (j == p.length) {
s.add(new Str(q,i-j+1));
}
}
}
if(s.size()==0)
{
System.out.println("没有与之匹配的串");
}
else
{
System.out.println("共有"+s.size()+"个相同串");
for(int i=0;i<s.size();i++)
{
System.out.println((i+1)+". "+"第"+(s.get(i).getRow()+1)+"行,第"+s.get(i).getStart()+"~"+(s.get(i).getStart()+goal.length()-1));
}
}
start();
}
public static int[] next(String ps) {
char[] p = ps.toCharArray();
int[] next = new int[p.length];
next[0] = -1;
int j = 0;
int k = -1;
while (j < p.length - 1) {
if (k == -1 || p[j] == p[k]) {
if (p[++j] == p[++k]) { // 当两个字符相等时要跳过
next[j] = next[k];
} else {
next[j] = k;
}
} else {
k = next[k];
}
}
return next;
}
public void BF(List<String> list,String goal) {
List<Str> s=new ArrayList<>();
for(int i=0;i<list.size();i++)
{
int l1 = list.get(i).length();
int l2 = goal.length();
int p = 0;
int j = 0;
int k = 0;
char[] str = list.get(i).toCharArray();
char[] match = goal.toCharArray();
while (p < l1 && j < l2) {
if (str[p] == match[j]) {
p++;
j++;
if(j==l2)
{
s.add(new Str(i,k+1));
j=0;
k+=l2;
}
} else {
k++;
j = 0;
p = k;
}
}
}
if(s.size()==0)
{
System.out.println("没有与之匹配的串");
}
else
{
System.out.println("共有"+s.size()+"个相同串");
for(int i=0;i<s.size();i++)
{
System.out.println((i+1)+". "+"第"+(s.get(i).getRow()+1)+"行,第"+s.get(i).getStart()+"~"+(s.get(i).getStart()+goal.length()-1));
}
}
start();
}
}
主类(测试类)
代码:
/**
* @author 莉莉安
* @ClassName: IntelliJ IDEA
* @Description: ${TODO}
* @date 2021/3/10 19:56
* ${tags}
*/
public class Test {
public static void main(String[]args){
Word word=new Word();
word.start();
}
}
算法描述
BF算法即暴力匹配法
代码:
public void BF(List<String> list,String goal) {
List<Str> s=new ArrayList<>();
for(int i=0;i<list.size();i++)
{
int l1 = list.get(i).length();
int l2 = goal.length();
int p = 0;
int j = 0;
int k = 0;
char[] str = list.get(i).toCharArray();
char[] match = goal.toCharArray();
while (p < l1 && j < l2) {
if (str[p] == match[j]) {
p++;
j++;
if(j==l2)
{
s.add(new Str(i,k+1));
j=0;
k+=l2;
}
} else {
k++;
j = 0;
p = k;
}
}
}
if(s.size()==0)
{
System.out.println("没有与之匹配的串");
}
else
{
System.out.println("共有"+s.size()+"个相同串");
for(int i=0;i<s.size();i++)
{
System.out.println((i+1)+". "+"第"+(s.get(i).getRow()+1)+"行,第"+s.get(i).getStart()+"~"+(s.get(i).getStart()+goal.length()-1));
}
}
start();
}
KMP算法
这个算法到现在还是脑壳有点昏
代码:
public void KMP(List<String> list,String goal) {
List<Str> s=new ArrayList<>();
for(int q=0;q<list.size();q++)
{
int i=0;
while(i< list.get(q).length())
{
char[] t = list.get(q).toCharArray();
char[] p = goal.toCharArray();
int j = 0; // 模式串的位置
int[] next = next(goal);
while (i < t.length && j < p.length) {
if (j == -1 || t[i] == p[j]) { // 当j为-1时,要移动的是i,当然j也要归0
i++;
j++;
} else {
j = next[j]; // j回到指定位置
}
}
if (j == p.length) {
s.add(new Str(q,i-j+1));
}
}
}
if(s.size()==0)
{
System.out.println("没有与之匹配的串");
}
else
{
System.out.println("共有"+s.size()+"个相同串");
for(int i=0;i<s.size();i++)
{
System.out.println((i+1)+". "+"第"+(s.get(i).getRow()+1)+"行,第"+s.get(i).getStart()+"~"+(s.get(i).getStart()+goal.length()-1));
}
}
start();
}
public static int[] next(String ps) {
char[] p = ps.toCharArray();
int[] next = new int[p.length];
next[0] = -1;
int j = 0;
int k = -1;
while (j < p.length - 1) {
if (k == -1 || p[j] == p[k]) {
if (p[++j] == p[++k]) { // 当两个字符相等时要跳过
next[j] = next[k];
} else {
next[j] = k;
}
} else {
k = next[k];
}
}
return next;
}