算法4第6章后缀数组讲解

最新推荐文章于 2023-04-10 11:25:04 发布

csprimer

最新推荐文章于 2023-04-10 11:25:04 发布

阅读量215

点赞数

分类专栏：算法

本文链接：https://blog.csdn.net/cyj88jyc/article/details/99695050

版权

算法专栏收录该内容

21 篇文章 0 订阅

订阅专栏

最长重复子字符串/后缀数组
给定字符串怎样找到它的最长重复子字符串，如字符串atobeornottobe，最长重复子字符串是tobe
使用一般的方法速度很慢，时间复杂度是O(N^2),而使用后缀数组可以巧妙而高效的解决此问题。
首先找出字符的所有后缀字符串组成后缀字符串数组，对数组进行排序，然后遍历数组，最长重复子字符串就在相邻的两个后缀字符串中
如字符串tobeornottobe，后缀字符串是atobeornottobe,tobeornottobe,obeornottobe,beornottobe,eornottobe,ornottobe,rnottobe，nottobe，ottobe，ttobe,tobe,obe,be,e
排序后就是
atobeornottobe
be
beornottobe
e
eornottobe
nottobe
obeornottobe
ornottobe
ottobe
rnottobe
ttobe
tobe
tobeornottobe
该算法在一般情况下时间复杂度是O(NlogN),代码如下

public class LongestRepeatedSubstring {

// Do not instantiate.
private LongestRepeatedSubstring() { }

    /**
     * Returns the longest common string of the two specified strings.
     *
     * @param s one string
     * @param t the other string
     * @return the longest common string that appears as a substring
     */

/**
 * Returns the longest repeated substring of the specified string.
 *
 * @param text the string
 * @return the longest repeated substring that appears in {@code text};
 * the empty string if no such string
 */
 public static String lrs(String text) {
 int n = text.length();
 SuffixArray sa = new SuffixArray(text);
 String lrs = "";
 for (int i = 1; i < n; i++) {
 int length = sa.lcp(i);
 if (length > lrs.length()) {
 // lrs = sa.select(i).substring(0, length);
 lrs = text.substring(sa.index(i), sa.index(i) + length);
 }
 }
 return lrs;
 }

    /**
     * Unit tests the {@code lrs()} method.
     *
     * @param args the command-line arguments
     */
    public static void main(String[] args) {
        String text = StdIn.readAll().replaceAll("\\s+", " ");
        StdOut.println("'" + lrs(text) + "'");
    }
}

public class SuffixArray {
private Suffix[] suffixes;

/**
 * Initializes a suffix array for the given {@code text} string.
 * @param text the input string
 */
 public SuffixArray(String text) {
 int n = text.length();
 this.suffixes = new Suffix[n];
 for (int i = 0; i < n; i++)
 suffixes[i] = new Suffix(text, i);
 Arrays.sort(suffixes);
 }

private static class Suffix implements Comparable<Suffix> {
 private final String text;
 private final int index;

        private Suffix(String text, int index) {
            this.text = text;
            this.index = index;
        }
        private int length() {
            return text.length() - index;
        }
        private char charAt(int i) {
            return text.charAt(index + i);
        }

public int compareTo(Suffix that) {
 if (this == that) return 0; // optimization
 int n = Math.min(this.length(), that.length());
 for (int i = 0; i < n; i++) {
 if (this.charAt(i) < that.charAt(i)) return -1;
 if (this.charAt(i) > that.charAt(i)) return +1;
 }
 return this.length() - that.length();
 }

        public String toString() {
            return text.substring(index);
        }
    }

    /**
     * Returns the length of the input string.
     * @return the length of the input string
     */
    public int length() {
        return suffixes.length;
    }

/**
 * Returns the index into the original string of the ith smallest suffix.
 * That is, {@code text.substring(sa.index(i))} is the ith smallest suffix.
 * @param i an integer between 0 and n-1
 * @return the index into the original string of the ith smallest suffix
 * @throws java.lang.IllegalArgumentException unless {@code 0 <= i < n}
 */
 public int index(int i) {
 if (i < 0 || i >= suffixes.length) throw new IllegalArgumentException();
 return suffixes[i].index;
 }

/**
 * Returns the length of the longest common prefix of the ith
 * smallest suffix and the i-1st smallest suffix.
 * @param i an integer between 1 and n-1
 * @return the length of the longest common prefix of the ith
 * smallest suffix and the i-1st smallest suffix.
 * @throws java.lang.IllegalArgumentException unless {@code 1 <= i < n}
 */
 public int lcp(int i) {
 if (i < 1 || i >= suffixes.length) throw new IllegalArgumentException();
 return lcpSuffix(suffixes[i], suffixes[i-1]);
 }

// longest common prefix of s and t
 private static int lcpSuffix(Suffix s, Suffix t) {
 int n = Math.min(s.length(), t.length());
 for (int i = 0; i < n; i++) {
 if (s.charAt(i) != t.charAt(i)) return i;
 }
 return n;
 }

/**
 * Returns the ith smallest suffix as a string.
 * @param i the index
 * @return the i smallest suffix as a string
 * @throws java.lang.IllegalArgumentException unless {@code 0 <= i < n}
 */
 public String select(int i) {
 if (i < 0 || i >= suffixes.length) throw new IllegalArgumentException();
 return suffixes[i].toString();
 }

/**
 * Returns the number of suffixes strictly less than the {@code query} string.
 * We note that {@code rank(select(i))} equals {@code i} for each {@code i}
 * between 0 and n-1.
 * @param query the query string
 * @return the number of suffixes strictly less than {@code query}
 */
 public int rank(String query) {
 int lo = 0, hi = suffixes.length - 1;
 while (lo <= hi) {
 int mid = lo + (hi - lo) / 2;
 int cmp = compare(query, suffixes[mid]);
 if (cmp < 0) hi = mid - 1;
 else if (cmp > 0) lo = mid + 1;
 else return mid;
 }
 return lo;
 }

// compare query string to suffix
 private static int compare(String query, Suffix suffix) {
 int n = Math.min(query.length(), suffix.length());
 for (int i = 0; i < n; i++) {
 if (query.charAt(i) < suffix.charAt(i)) return -1;
 if (query.charAt(i) > suffix.charAt(i)) return +1;
 }
 return query.length() - suffix.length();
 }

    /**
     * Unit tests the {@code SuffixArray} data type.
     *
     * @param args the command-line arguments
     */
    public static void main(String[] args) {
        String s = StdIn.readAll().replaceAll("\\s+", " ").trim();
        SuffixArray suffix = new SuffixArray(s);

// StdOut.println("rank(" + args[0] + ") = " + suffix.rank(args[0]));

StdOut.println(" i ind lcp rnk select");
StdOut.println("---------------------------");

for (int i = 0; i < s.length(); i++) {
 int index = suffix.index(i);
 String ith = "\"" + s.substring(index, Math.min(index + 50, s.length())) + "\"";
 assert s.substring(index).equals(suffix.select(i));
 int rank = suffix.rank(s.substring(index));
 if (i == 0) {
 StdOut.printf("%3d %3d %3s %3d %s\n", i, index, "-", rank, ith);
 }
 else {
 int lcp = suffix.lcp(i);
 StdOut.printf("%3d %3d %3d %3d %s\n", i, index, lcp, rank, ith);
 }
 }
 }

}