串的模式匹配
//3.3.1 朴素的模式匹配(Brute-Force)算法
public class BF
{
private static int count=0; //记载比较次数
//返回模式串pattern在目标串target中从begin开始的首次匹配位置,匹配失败时返回-1
public static int indexOf(String target, String pattern, int begin)
java.lang.String字符串的查找、替换和删除子串操作。
public class DeleteString
{
//返回将target串中首个与pattern匹配的子串删除后的字符串
public static String deleteFirst(String target, String pattern)
{
int i=target.indexOf(pattern);
if (i==-1)
return target;
return target.substring(0,i)+target.substring(i+pattern.length());
}
//返回将target串中所有与pattern匹配的子串删除后的字符串
public static String deleteAll(String target, String pattern)
{
int i=target.indexOf(pattern);
while (i!=-1)
{
target = target.substring(0,i)+target.substring(i+pattern.length());
i=target.indexOf(pattern,i);
}
return target;
}
public static void main(String args[])
{
//图3.11,替换子串
// String target="ababdabcdabcabc", pattern="abc", replacement="xy"; //例3.3数据
String target="aaaa", pattern="a", replacement="ab"; //例3.4数据
System.out.println("\""+target+"\".indexOf(\""+pattern+"\")="+target.indexOf(pattern));
System.out.println("\""+target+"\".replaceFirst(\""+pattern+"\", \""+replacement+"\")="+
target.replaceFirst(pattern,replacement));
System.out.println("\""+target+"\".replaceAll(\""+pattern+"\", \""+replacement+"\")="+
target.replaceAll(pattern,replacement));
//图3.12,删除子串
System.out.println("deleteFirst(\""+target+"\", \""+pattern+"\")="+deleteFirst(target, pattern));
System.out.println("deleteAll(\""+target+"\", \""+pattern+"\")="+deleteAll(target, pattern));
}
}
/*
程序运行结果如下:
//例3.3数据
"ababdabcdabcabc".indexOf("abc")=5
"ababdabcdabcabc".replaceFirst("abc", "xy")=ababdxydabcabc
"ababdabcdabcabc".replaceAll("abc", "xy")=ababdxydxyxy
deleteFirst("ababdabcdabcabc", "abc")=ababddabcabc
deleteAll("ababdabcdabcabc", "abc")=ababdd
//例3.4数据
"aaaa".indexOf("a")=0
"aaaa".replaceFirst("a", "ab")=abaaa
"aaaa".replaceAll("a", "ab")=abababab
deleteFirst("aaaa", "a")=aaa
deleteAll("aaaa", "a")=
*/
int i=begin, j=0; //i、j分别为目标串和模式串当前字符的下标 count=0; while (i<target.length()) { if (target.charAt(i)==pattern.charAt(j)) //若当前两字符相等,则继续比较后续字符 { i++; j++; } else //否则i、j回溯,进行下一次匹配 { i=i-j+1; //目标串下标i退回到下一个待匹配子串首字符 j=0; //模式串下标j退回到0 } count++; if (j==pattern.length()) //一次匹配结束,匹配成功 { System.out.println("BF.count="+BF.count); return i-j; //返回匹配的子串序号 } } } System.out.println("BF.count="+BF.count); return -1; //匹配失败时返回-1 } //返回模式串pattern在目标串target中从0开始的首次匹配位置,匹配失败时返回-1 public static int indexOf(String target, String pattern) { return BF.indexOf(target, pattern, 0); } public static void main(String args[]) { String target="ababdabcd", pattern="abc"; //图3.10,BF用例// String target="aabaaa", pattern="aab"; //图3.13(a),匹配成功,最好情况// String target="aaaaa", pattern="aab"; //图3.13(b),最坏情况,匹配不成功// String target="aaaab", pattern="aab"; //最坏情况,匹配成功 System.out.println("BF.indexOf(\""+target+"\",\""+pattern+"\")="+BF.indexOf(target,pattern)); }}/*程序运行结果如下:BF.count=12BF.indexOf("ababdabcd","abc")=5 //图3.10,BF用例BF.count=3BF.indexOf("aabaaa","aab")=0 //图3.13(a),匹配成功,最好情况BF.count=11BF.indexOf("aaaaa","aab")=-1 //图3.13(b),最坏情况,匹配不成功BF.count=9BF.indexOf("aaaab","aab")=2 //最坏情况,匹配成功*/
java.lang.StringBuffer字符串的替换和删除子串操作。
public class ReplaceStringBuffer
{
//将target串中首个与pattern匹配的子串替换成replacement,返回替换后的target串,改变target串
public static StringBuffer replaceFirst(StringBuffer target, String pattern, String replacement)
{
int i=target.indexOf(pattern);
if(i!=-1)
{
target.delete(i, i+pattern.length()); //删除i~i+pattern.length()-1的子串
target.insert(i, replacement); //在第i个字符处插入replacement串
}
return target;
}
//将target串中所有与pattern匹配的子串全部替换成replacement,返回替换后的target串,改变target串
public static StringBuffer replaceAll(StringBuffer target, String pattern, String replacement)
{
int i=target.indexOf(pattern);
while (i!=-1)
{
target.delete(i, i+pattern.length());
target.insert(i, replacement);
i=target.indexOf(pattern, i+replacement.length());
// i=target.indexOf(pattern, i+1); //错
}
return target;
}
//删除target串中首个与pattern匹配的子串,返回删除后的target串,改变target串
public static StringBuffer deleteFirst(StringBuffer target, String pattern)
{
int i=target.indexOf(pattern);
if(i!=-1)
target.delete(i, i+pattern.length());
return target;
}
public static void main(String args[])
{
StringBuffer target = new StringBuffer("ababdabcdabcabc"); //例3.3 数据
String pattern="abc", replacement="xy";
// StringBuffer target = new StringBuffer("aaaa"); //例3.4 数据
// String pattern="a", replacement="ab";
System.out.println("replaceFirst(\""+target+"\", \""+pattern+"\", \""+replacement+"\")="+
replaceFirst(target, pattern, replacement));
System.out.println("replaceAll(\""+target+"\", \""+pattern+"\", \""+replacement+"\")="+
replaceAll(target, pattern, replacement));
pattern=replacement;
System.out.println("deleteFirst(\""+target+"\", \""+pattern+"\")="+deleteFirst(target, pattern));
System.out.println("deleteAll(\""+target+"\", \""+pattern+"\")="+deleteAll(target, pattern));
System.out.println("removeAll(\""+target+"\", \""+pattern+"\")="+removeAll(target, pattern));
}
//习题3,【例3.4】思考题
//删除target串中所有与pattern匹配的子串,返回删除后的target串,改变target串
public static StringBuffer deleteAll(StringBuffer target, String pattern)
{
int i=target.indexOf(pattern);
while (i!=-1)
{
target.delete(i, i+pattern.length());
i=target.indexOf(pattern, i);
}
return target;
}
//删除target串中所有与pattern匹配的子串,返回删除后的target串,改变target串
//改进上述deleteAll()方法,对StringBuffer字符串删除所有匹配子串,字符一次移动到位
public static StringBuffer removeAll(StringBuffer target, String pattern)
{
int m=target.length(), n=pattern.length();
int i=target.indexOf(pattern), k=i;
while (k!=-1)
{
int j=k+n;
k=target.indexOf(pattern, j);
while (k>0 && j<k || k<0 && j<m)
target.setCharAt(i++, target.charAt(j++));
}
target.setLength(i); //设置target串长度为i
return target;
}
}
/*
程序运行结果如下:
//例3.3 数据
replaceFirst("ababdabcdabcabc", "abc", "xy")=ababdxydabcabc
replaceAll("ababdxydabcabc", "abc", "xy")=ababdxydxyxy
deleteFirst("ababdxydxyxy", "xy")=ababddxyxy
deleteAll("ababddxyxy", "xy")=ababdd
//例3.4 数据
replaceFirst("aaaa", "a", "ab")=abaaa
replaceAll("abaaa", "a", "ab")=abbababab
deleteFirst("abbababab", "ab")=bababab
deleteAll("bababab", "ab")=b
*/
/*
程序设计说明:
1、replaceAll()方法
如果while中语句如下,当pattern="a", replacement="ab"时,死循环。
i=target.indexOf(pattern, i);
如果while中语句如下,当pattern="a", replacement="aab"时,死循环。
i=target.indexOf(pattern, i+1);
*/
KMP算法:
public class KMP
{
private static int count=0; //记载比较次数
private static int[] next; //模式串pattern改进的next数组
private static int[] nextk; //模式串pattern未改进的next数组
//返回模式串pattern在目标串target中从begin开始的首次匹配位置,匹配失败时返回-1
public static int indexOf(String target, String pattern, int begin)
{
if (pattern.length()>0 && target.length()>=pattern.length())
{ //当目标串比模式串长时进行比较
int i=begin, j=0; //i、j分别为目标串、模式串当前比较字符下标
count=0;
nextk = getNextk(pattern);
System.out.println("nextk[]: "+toString(nextk));
next = getNext(pattern); //返回模式串pattern改进的next数组
System.out.println("next[]: "+toString(next));
while (i<target.length())
{
if (j!=-1)
System.out.println("KMP.count="+KMP.count+",i="+i+",j="+j+","+target.charAt(i)+"=="+pattern.charAt(j)+"?");
if (j==-1 || target.charAt(i)==pattern.charAt(j))
{ //若条件成立(当前两字符相等),则继续比较后续字符
i++;
j++;
}
else //否则目标串下标i不回溯,进行下次匹配
j=next[j]; //模式串下标j退回到next[j]
if(j!=-1) count++;
if (j==pattern.length()) //一次匹配结束,匹配成功
{
System.out.println("KMP.count="+KMP.count);
return i-j; //返回匹配的子串序号
}
}
}
System.out.println("KMP.count="+KMP.count);
return -1; //匹配失败
}
//返回模式串pattern在目标串target中从0开始的首次匹配位置,匹配失败时返回-1
public static int indexOf(String target, String pattern)
{
return indexOf(target, pattern, 0);
}
private static int[] getNextk(String pattern) //返回模式串pattern的next数组
{
int j=0, k=-1;
int[] next=new int[pattern.length()];
next[0]=-1;
while (j<pattern.length()-1)
if (k==-1 || pattern.charAt(j)==pattern.charAt(k))
{
j++;
k++;
next[j]=k; //有待改进
}
else k=next[k];
return next;
}
private static int[] getNext(String pattern) //返回模式串pattern改进的next数组
{
int j=0, k=-1;
int[] next=new int[pattern.length()];
next[0]=-1;
while (j<pattern.length()-1)
if (k==-1 || pattern.charAt(j)==pattern.charAt(k))
{
j++;
k++;
if (pattern.charAt(j)!=pattern.charAt(k)) //改进之处
next[j]=k;
else
next[j]=next[k];
}
else k=next[k];
return next;
}
private static String toString(int[] next) //输出next[]数组
{
String str="";
for (int i=0; i<next.length; i++)
str += next[i]+" ";
return str;
}
public static void main(String args[])
{
// String target="abdabcabbabcabc", pattern="abcabc"; //图3.17
// String target="ababdabcd", pattern="abc"; //BF用例,图3.10
// String target="abcabdabcabcaa", pattern="abcabdabcabcaa"; //表3.4
// String target="aaaaa", pattern="aab"; //最坏情况,匹配不成功,图3.13(b)
// String target="aaaab", pattern="aab"; //最坏情况,匹配成功
//习题3
String target="ababaab", pattern="aab"; //习3.12①
// String target="aaabaaaab", pattern="aaaab"; //习3.12②
// String target="acabbabbabc", pattern="abbabc"; //习3.12③
// String target="acabcabbabcabc", pattern="abcabaa"; //习3.12④
// String target="aabcbabcaabcaababc", pattern="abcaababc"; //习3.12⑤,张乃孝书
System.out.println("KMP.indexOf(\""+target+"\", \""+pattern+"\")="+KMP.indexOf(target, pattern));
}
}
/*
程序运行结果如下:
KMP.indexOf("abdabcabbabcabc", "abcabc")=9 //图3.17
nextk[]: -1 0 0 0 1 2
next[]: -1 0 0 -1 0 0
KMP.count=17
KMP.indexOf("ababdabcd", "abc")=5 //BF用例,图3.10
nextk[]: -1 0 0
next[]: -1 0 0
KMP.count=10
KMP.indexOf("abcabdabcabcaa", "abcabdabcabcaa")=0 //表3.4
nextk[]: -1 0 0 0 1 2 0 1 2 3 4 5 3 4
next[]: -1 0 0 -1 0 2 -1 0 0 -1 0 5 -1 4
KMP.count=14
KMP.indexOf("aaaaa", "aab")=-1 //图3.13(b),最坏情况,匹配不成功,比较n+m次
nextk[]: -1 0 1
next[]: -1 -1 1
KMP.count=8
KMP.indexOf("aaaab", "aab")=2 //最坏情况,匹配成功,O(n+m)
nextk[]: -1 0 1
next[]: -1 -1 1
KMP.count=7
//习题3,//习3.12
KMP.indexOf("ababaab", "aab")=4 //习3.12①
nextk[]: -1 0 1
next[]: -1 -1 1
KMP.count=7
KMP.indexOf("aaabaaaab", "aaaab")=4 //习3.12②
nextk[]: -1 0 1 2 3
next[]: -1 -1 -1 -1 3
KMP.count=9
KMP.indexOf("acabbabbabc", "abbabc")=5 //习3.12③
nextk[]: -1 0 0 0 1 2
next[]: -1 0 0 -1 0 2
KMP.count=13
KMP.indexOf("acabcabbabcabc", "abcabaa")=-1 //习3.12④
nextk[]: -1 0 0 0 1 2 1
next[]: -1 0 0 -1 0 2 1
KMP.count=18
KMP.indexOf("aabcbabcaabcaababc", "abcaababc")=9 //习3.12⑤,张乃孝书
nextk[]: -1 0 0 0 1 1 2 1 2
next[]: -1 0 0 -1 1 0 2 0 0
KMP.count=20
KMP.indexOf("abbabaaba", "aba")=3 //BF用例
nextk[]: -1 0 0
next[]: -1 0 -1
KMP.count=6
*/