函数在两个字符串之间查找所有重叠域
字符串长度都比较小情况请忽略本文
匹配到的重叠部分的的int[](CodePoint []) 传入的 Collection ints
checkAllOverlap(String ss, String sl, int minLength, Collection<int[]> ints)
ss :比对字符
sl:比对字符
minLength : 重叠部分的最小长度限制
ints :收集结果
全部匹配 的原因 会有大量重叠的字符 可自行筛查 或者加入过滤条件
public static void testCheckAllOverlap(){
System.err.println("tast");
ArrayList<int[]> ints = new ArrayList<>();
checkAllOverlap("12312311aabb0012312xvxc海绵bob爆菊花派大星", "123!aab爆菊花101a", 2, ints);
ints.forEach(ints1 -> {
System.err.println(new String(ints1, 0, ints1.length));
});
}
public static void checkAllOverlap(String ss, String sl, int minLength, Collection<int[]> ints) {
if (sl.length() < ss.length()) {
String t = ss;
ss = sl;
sl = t;
}
if (minLength <= 1) {
throw new IllegalArgumentException(" ");
}
int[][] small = chainSameCodePointUnidirectional(ss, Direction.HeadToTail);
int[][] large = chainSameCodePointUnidirectional(sl, Direction.HeadToTail);
int[] lCodePoints = large[0];//
int[] nextIndex_large = large[1];
int[] sCodePoints = small[0];
int[] nextIndex_small = small[1];
int length_s = small[0].length;
int length_l = large[0].length;
//System.err.println("!" + Arrays.toString(nextIndex_small));
for (int si = 0; si < length_s; si++) {
// System.err.println("si " + si);
if (nextIndex_small[si] < 0) continue;//skip which are patch already
int value_checking = sCodePoints[si];
// System.err.println("checking " + new String(new int[]{value_checking}, 0, 1));
int li = -1;
for (int c = 0; c < length_l; c++) {
if (lCodePoints[c] == value_checking) {
li = c;
// System.err.println("check " + new String(new int[]{value_checking}, 0, 1));
break;
}
}
if (li != -1) {
for (int sc = si; sc < length_s; )//小字符 同值链
{
for (int lc = li, leftPatch = 0, rightPatch = 0; lc < length_l; leftPatch = 0, rightPatch = 0)// 大字符 同值链
{
//
final int limitR = Math.min(length_s - sc, length_l - lc) - 1;
for (; rightPatch < limitR; rightPatch++) {
int s_patch = sc + rightPatch + 1;
int l_patch = lc + rightPatch + 1;
if (nextIndex_large[s_patch] < 0 || nextIndex_large[l_patch] < 0) break;
if (!(sCodePoints[s_patch] == lCodePoints[l_patch])) break;
}
final int limitL = Math.min(sc, lc);
for (; leftPatch < limitL; leftPatch++) {
int s_patch = sc - leftPatch - 1;
int l_patch = lc - leftPatch - 1;
//if (nextIndex_large[s_patch] < 0 || nextIndex_large[l_patch] < 0) break;
if (!(sCodePoints[s_patch] == lCodePoints[l_patch])) break;
}
int len = leftPatch + rightPatch + 1;
if (len >= minLength) {
int[] dest = new int[len];
System.arraycopy(sCodePoints, sc - leftPatch, dest, 0, len);
ints.add(dest);//todo 获取数据 筛选条件
}
/
int next = nextIndex_large[lc];
if (next == lc) break;
lc = next;
}
int next = nextIndex_small[sc];
if (next == sc) break;
sc = next;
}
for (int i = li; i < length_l; )//sl 清理索引
{
int next = nextIndex_large[i];
nextIndex_large[i] = -1;
if (i == next) break;
i = next;
}
}
for (int i = si; i < length_s; )//ss 清理索引
{
int next = nextIndex_small[i];
nextIndex_small[i] = -1;
if (i == next) break;
i = next;
}
}
}
public enum Direction {HeadToTail, TailToHead}
/**
* @param target
* @return in[][] contain with
*/
static int[][] chainSameCodePointUnidirectional(String target, Direction direction) {
//部分Unicode需要2个char表示
int[] codePoints = target.codePoints().toArray();
int[][] codePoints_chain_next = new int[2][];
int[] extracted;
switch (direction) {
case HeadToTail: {
extracted = linkSameValueFromHead(codePoints);
break;
}
case TailToHead: {
extracted = linkSameValueFromTail(codePoints);
break;
}
default: {
extracted = linkSameValueFromHead(codePoints);
}
}
codePoints_chain_next[0] = codePoints;
codePoints_chain_next[1] = extracted;
return codePoints_chain_next;
}
/**
* check target and find same integer record to a 2 dimension Array
* such as input [1(i=0),2(i=1),3(i=2),1(i=3),0(i=4),0(i=5),1(i=6),2(i=7),3(i=8)]
* return [3,7,8,6,5,5,6z,7,8] the index point the next appear
*
* @param target that method will check it and link the same value position
* @return int[][] contain two Array which equal length with target int[0] pre and int[1]next
* <p>
* <p>
* int[0] pre : value in the pre[i] record the index point to previous position where have same value on target[i] appear recent
* <p>
* int [1] next: value in the next[i] record the index point to previous position where have same value on target[i] appear recent
*/
private static int[][] linkSameValue(int[] target) {
final int length = target.length;
int[] pre = new int[length];
int[] next = new int[length];
for (int i = 0; i < length; i++) {
if (next[i] == 0 || pre[i] == 0) {
pre[i] = i;
next[i] = i;
int checking = target[i];
for (int j = i + 1, p = i; j < length; j++) {
if (target[j] == checking) {
next[p] = j;
pre[j] = p;
next[j] = j;
p = j;
}
}
}
}
return new int[][]{pre, next};
}
private static int[] linkSameValueFromTail(int[] target) {
final int length = target.length;
int[] pre = new int[length];
for (int i = 0; i < length; i++) {
if (pre[i] == 0) {
pre[i] = i;
int checking = target[i];
for (int j = i + 1, p = i; j < length; j++) {
if (target[j] == checking) {
pre[j] = p;
p = j;
}
}
}
}
return pre;
}
/**
* check target and find same integer record to a 2 dimension Array
* such as input 1(i=0),2(i=1),3(i=2),1(i=3),0(i=4),0(i=5),1(i=6),2(i=7),3(i=8)
* return [3,7,8,6,5,5,6z,7,8] the index point the next appear
*
* @param target that method will check it and link the same value position
* @return int [] next: value in the next[i] record the index point to previous position where have same value on target[i] appear recent
*/
private static int[] linkSameValueFromHead(int[] target) {
final int length = target.length;
int[] next = new int[length];
for (int i = 0; i < length; i++) {
if (next[i] == 0) {
next[i] = i;
int checking = target[i];
for (int j = i + 1, p = i; j < length; j++) {
if (target[j] == checking) {
next[p] = j;
next[j] = j;
p = j;
}
}
}
}
return next;
}