求最长重复子串的算法
public class StrTest {
@Test
public void longDoubSubStr(){
String str = "eabcdabcfeabeabeabeab";
String longDoubSubStr1 = findLongestSubString(str);
System.out.println("longDoubSubStr1:"+longDoubSubStr1);
String longDoubSubStr2 = findSubStr(str);
System.out.println("longDoubSubStr2:"+longDoubSubStr2);
String longDoubSubStr3 = findSubStrByNxt(str);
System.out.println("longDoubSubStr3:"+longDoubSubStr3);
}
private String findSubStrByNxt(String str) {
int maxPos = 0;
int maxReStrLen = 0;
/**
* 使用KMP算法:依次去掉str最前端的i个字符,计算余下子串的next[]数组,数组的最大指,即为最大子串长度,
* 并根据得到最大子串时的位置的i值,获取最大子串
*/
for (int pos = 0; pos < str.length(); pos++){
String subStrTem = str.substring(pos);
/**
*
*/
int reStrLen = calcNxtArry(subStrTem);
if (reStrLen > maxReStrLen){
maxReStrLen = reStrLen;
maxPos = pos;
}
}
/**
* 从maxSubStrLenSubInfo中解析出重复的最长子串信息
*/
return str.substring(maxPos, maxPos + maxReStrLen);
}
private int calcNxtArry(String subStrTem) {
int maxReStrLen = 0;
int reStrLen = 0;
int preIndexReStrLen = 0;
for (int index = 0; index < subStrTem.length(); index++){
reStrLen = getNxt(index, subStrTem, preIndexReStrLen);
if (maxReStrLen < reStrLen){
maxReStrLen = reStrLen;
}
preIndexReStrLen = reStrLen;
}
return maxReStrLen;
}
private int getNxt(int index, String subStrTem, int preIndexReStrLen) {
int subStrLen = 0;
if (index == 0){
return 0;
}
/**
* 以下的逻辑可以优化:
* 1)记录前一index对应的preindex_subStrLen,subStrLen = preindex_subStrLen +1 开始循环
*/
// for(subStrLen = index; subStrLen > 0; subStrLen--){
// if (subStrTem.substring(0, subStrLen).equals(subStrTem.substring(index - subStrLen + 1, index + 1))){
// break;
// }
// }
for(subStrLen = preIndexReStrLen +1; subStrLen > 0; subStrLen--){
if (subStrTem.substring(0, subStrLen).equals(subStrTem.substring(index - subStrLen + 1, index + 1))){
break;
}
}
return subStrLen;
}
/**
* 简化的三层循环
* @param str
* @return
*/
private String findLongestSubString(String str) {
int maxSubStrLen = 0;
int maxSubStrPos = 0;
/**
* 对str有效性做检验
*/
if(!checkValid(str)){
return null;
}
/**
* 双重遍历,寻找存在的重复子串,并记录重复子串长度:
* 外重循环:两个子串起始坐标的间隔,从(1,len(str)-1)
* 内重循环:从源串的起始位置,依次往后遍历
* 维护找到的最长重复子串的长度以及起始位置。
*/
int strLength = str.length();
for (int interval = 1; interval < strLength; interval++){
for (int pos = 0; pos + interval < strLength; pos++){
int subStrLen = 0;
try {
while (str.charAt(pos + subStrLen) == str.charAt(pos + interval + subStrLen)){
subStrLen++;
if (pos + interval + subStrLen >= strLength){
break;
}
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("pos: "+pos+"; interval: "+interval);
}
if (subStrLen > maxSubStrLen){
maxSubStrLen = subStrLen;
maxSubStrPos = pos;
}
}
}
/**
* 构造子串,返回
*/
return str.substring(maxSubStrPos, maxSubStrPos+maxSubStrLen);
}
private boolean checkValid(String str) {
return StringUtils.isNotBlank(str);
}
/***
* 原始的三层循环
*/
private String findSubStr(String str){
/**
* 定义重复出现的最大子串
*/
String maxSubStr = "";
/**
* 定义内层循环找到重复子串的标记,默认false,为找到,内层循环一旦找到,就跳出外层循环
*/
boolean gotFlag = false;
/**
* 遍历所有可能的最大长度,理论最大值len(str)-1
*/
for (int subStrLen = str.length()-1; subStrLen > 0; subStrLen--){
/**
* 遍历str中所有长度为subStrLen的数组,验证是否重复
*/
for (int pos = 0; pos + subStrLen < str.length(); pos++){
/**
* 得到子串
*/
String subStr = str.substring(pos, pos+subStrLen);
/**
* 得到str首次出现子串的起始坐标
*/
int pos_fix = str.indexOf(subStr);
/**
* 得到str末次出现子串的起始坐标
*/
int pos_post = str.lastIndexOf(subStr);
/**
* 两者不相等则说明重复出现
*/
if (pos_fix != pos_post){
maxSubStr = subStr;
gotFlag = true;
break;
}
}
if (gotFlag == true){
break;
}
}
return maxSubStr;
}
}