双字符串重叠部分获取

函数在两个字符串之间查找所有重叠域
字符串长度都比较小情况请忽略本文
匹配到的重叠部分的的int[](CodePoint []) 传入的 Collection ints
checkAllOverlap(String ss, String sl, int minLength, Collection<int[]> ints)
ss :比对字符
sl:比对字符
minLength : 重叠部分的最小长度限制
ints :收集结果
全部匹配 的原因 会有大量重叠的字符 可自行筛查 或者加入过滤条件


    public static void testCheckAllOverlap(){
        System.err.println("tast");
        ArrayList<int[]> ints = new ArrayList<>();
        checkAllOverlap("12312311aabb0012312xvxc海绵bob爆菊花派大星", "123!aab爆菊花101a", 2, ints);
        ints.forEach(ints1 -> {
            System.err.println(new String(ints1, 0, ints1.length));
        });
    }
    
    public static void checkAllOverlap(String ss, String sl, int minLength, Collection<int[]> ints) {
        if (sl.length() < ss.length()) {
            String t = ss;
            ss = sl;
            sl = t;
        }
        if (minLength <= 1) {
            throw new IllegalArgumentException(" ");
        }
        int[][] small = chainSameCodePointUnidirectional(ss, Direction.HeadToTail);
        int[][] large = chainSameCodePointUnidirectional(sl, Direction.HeadToTail);

        int[] lCodePoints = large[0];//
        int[] nextIndex_large = large[1];
        int[] sCodePoints = small[0];
        int[] nextIndex_small = small[1];

        int length_s = small[0].length;
        int length_l = large[0].length;
        //System.err.println("!" + Arrays.toString(nextIndex_small));
        for (int si = 0; si < length_s; si++) {
          //  System.err.println("si " + si);
            if (nextIndex_small[si] < 0) continue;//skip which are patch already
            int value_checking = sCodePoints[si];
          //  System.err.println("checking " + new String(new int[]{value_checking}, 0, 1));
            int li = -1;
            for (int c = 0; c < length_l; c++) {
                if (lCodePoints[c] == value_checking) {
                    li = c;
                   // System.err.println("check " + new String(new int[]{value_checking}, 0, 1));
                    break;
                }
            }
            if (li != -1) {
                for (int sc = si; sc < length_s; )//小字符 同值链
                {
                    for (int lc = li, leftPatch = 0, rightPatch = 0; lc < length_l; leftPatch = 0, rightPatch = 0)// 大字符 同值链
                    {
                        //
                        final int limitR = Math.min(length_s - sc, length_l - lc) - 1;
                        for (; rightPatch < limitR; rightPatch++) {
                            int s_patch = sc + rightPatch + 1;
                            int l_patch = lc + rightPatch + 1;
                            if (nextIndex_large[s_patch] < 0 || nextIndex_large[l_patch] < 0) break;
                            if (!(sCodePoints[s_patch] == lCodePoints[l_patch])) break;

                        }
                        final int limitL = Math.min(sc, lc);
                        for (; leftPatch < limitL; leftPatch++) {
                            int s_patch = sc - leftPatch - 1;
                            int l_patch = lc - leftPatch - 1;
                            //if (nextIndex_large[s_patch] < 0 || nextIndex_large[l_patch] < 0) break;
                            if (!(sCodePoints[s_patch] == lCodePoints[l_patch])) break;

                        }
                        int len = leftPatch + rightPatch + 1;
                        if (len >= minLength) {
                            int[] dest = new int[len];
                            System.arraycopy(sCodePoints, sc - leftPatch, dest, 0, len);
                            ints.add(dest);//todo 获取数据 筛选条件
                        }
                        /
                        int next = nextIndex_large[lc];
                        if (next == lc) break;
                        lc = next;
                    }
                    int next = nextIndex_small[sc];
                    if (next == sc) break;
                    sc = next;

                }

                for (int i = li; i < length_l; )//sl 清理索引
                {
                    int next = nextIndex_large[i];
                    nextIndex_large[i] = -1;
                    if (i == next) break;
                    i = next;
                }
            }

            for (int i = si; i < length_s; )//ss 清理索引
            {
                int next = nextIndex_small[i];
                nextIndex_small[i] = -1;
                if (i == next) break;
                i = next;
            }

        }

    }


    public enum Direction {HeadToTail, TailToHead}

    /**
     * @param target
     * @return in[][] contain with
     */
    static int[][] chainSameCodePointUnidirectional(String target, Direction direction) {
        //部分Unicode需要2个char表示
        int[] codePoints = target.codePoints().toArray();
        int[][] codePoints_chain_next = new int[2][];
        int[] extracted;
        switch (direction) {
            case HeadToTail: {
                extracted = linkSameValueFromHead(codePoints);
                break;

            }
            case TailToHead: {
                extracted = linkSameValueFromTail(codePoints);
                break;
            }
            default: {
                extracted = linkSameValueFromHead(codePoints);
            }
        }
        codePoints_chain_next[0] = codePoints;
        codePoints_chain_next[1] = extracted;
        return codePoints_chain_next;

    }

    /**
     * check  target and find same integer  record to a  2 dimension Array
     * such as  input [1(i=0),2(i=1),3(i=2),1(i=3),0(i=4),0(i=5),1(i=6),2(i=7),3(i=8)]
     * return   [3,7,8,6,5,5,6z,7,8] the index point the next appear
     *
     * @param target that method will check it and link the same value position
     * @return int[][]  contain two Array which equal length with  target int[0] pre and int[1]next
     * <p>
     * <p>
     * int[0] pre : value in the pre[i]  record the index point to previous position  where have same value on target[i]  appear  recent
     * <p>
     * int [1] next: value in the next[i]  record the index point to previous position  where have same value on target[i]  appear  recent
     */
    private static int[][] linkSameValue(int[] target) {
        final int length = target.length;
        int[] pre = new int[length];
        int[] next = new int[length];
        for (int i = 0; i < length; i++) {
            if (next[i] == 0 || pre[i] == 0) {
                pre[i] = i;
                next[i] = i;
                int checking = target[i];
                for (int j = i + 1, p = i; j < length; j++) {
                    if (target[j] == checking) {
                        next[p] = j;
                        pre[j] = p;
                        next[j] = j;
                        p = j;

                    }
                }
            }

        }
        return new int[][]{pre, next};
    }

    private static int[] linkSameValueFromTail(int[] target) {
        final int length = target.length;
        int[] pre = new int[length];
        for (int i = 0; i < length; i++) {
            if (pre[i] == 0) {
                pre[i] = i;
                int checking = target[i];
                for (int j = i + 1, p = i; j < length; j++) {
                    if (target[j] == checking) {
                        pre[j] = p;
                        p = j;

                    }
                }
            }

        }
        return pre;
    }

    /**
     * check  target and find same integer  record to a  2 dimension Array
     * such as  input 1(i=0),2(i=1),3(i=2),1(i=3),0(i=4),0(i=5),1(i=6),2(i=7),3(i=8)
     * return   [3,7,8,6,5,5,6z,7,8] the index point the next appear
     *
     * @param target that method will check it and link the same value position
     * @return int [] next: value in the next[i]  record the index point to previous position  where have same value on target[i]  appear  recent
     */
    private static int[] linkSameValueFromHead(int[] target) {
        final int length = target.length;
        int[] next = new int[length];
        for (int i = 0; i < length; i++) {
            if (next[i] == 0) {
                next[i] = i;
                int checking = target[i];
                for (int j = i + 1, p = i; j < length; j++) {
                    if (target[j] == checking) {
                        next[p] = j;
                        next[j] = j;
                        p = j;

                    }
                }
            }

        }
        return next;
    }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值