/**
* Author: yiminghe
* Date: 2008-10-24
* Time: 15:08:32
* Any problem ,contact yiminghe@fudan.edu.cn.
*/
import java.io.*;
import java.util.*;
/**
* 利用 后缀树算法
*
* LCS 问题及其扩展,找到多个字符串的所有公共子串
*
*/
class LCS {
public static void main(String[] args) throws IOException {
String[] source = {"axybcde", "cdefxy", "xyccde"};
SuffixTreeNode st = buildSuffixTree(source);
String result;
result = Lcs(st.firstChild, source.length);
if (result.equals(""))
System.out.println("No common string!");
else
System.out.println("The longest common substring is : "
+ result + " .");
String[] commons = commonString(source);
System.out.println(Arrays.asList(commons));
}
/**
* 建立一个后缀树
*
* @param ss 字符串数组
* @return 后缀树的根结点
*/
public static SuffixTreeNode buildSuffixTree(String ss[]) {
HashMap<String, String> belong = new HashMap<String, String>();
belong.put("0", "");
SuffixTreeNode SuffixTree =
new SuffixTreeNode(-1, "", 0, belong);
//Add suffixs...
for (int i = 0; i < ss.length; i++) {
System.err.print("后缀树[" + (i + 1) + "]");
belong = new HashMap<String, String>();
belong.put("" + (i + 1), "");
for (int index = 0; index < ss[i].length(); index++) {
String str = ss[i].substring(index);
SuffixTree.insert(index, str, 0, belong);
}
System.err.println(" - OK");
}
return SuffixTree;
}
/**
* 深度遍历
*
* @param suffixtree 根的后缀树结点
* @param count 字符串总数
* @return 最长公共子串
*/
public static String Lcs(SuffixTreeNode suffixtree, int count) {
String result = "";
String result2;
while (suffixtree != null) {
int flag = suffixtree.belongTo.size();
if (flag == count) {
if (suffixtree.isLeaf()) {
//找到最大
if (result.length() <
suffixtree.label.length())
result = suffixtree.label;
} else {
//只是后缀的后缀
result2 = Lcs(suffixtree.firstChild, count);
//要完整的后缀
if (result.length() <
(suffixtree.label.length() + result2.length()))
result = suffixtree.label + result2;
}
}
suffixtree = suffixtree.next;
}
return result;
}
/**
* 找到所有的相同子串,子串间不相互包含
*
* @param source 字符串集合
* @return 字符串集合
*/
public static String[] commonString(String[] source) {
HashSet<String> r = new HashSet<String>();
SuffixTreeNode st = buildSuffixTree(source);
recurCommon(r, st.firstChild, source.length);
String[] original = r.toArray(new String[r.size()]);
ArrayList<String> result = new ArrayList<String>();
for (int i = 0; i < original.length; i++) {
int j = 0;
for (j = 0; j < original.length; j++) {
//有和其它元素相互包含 ,舍弃
if (i != j && original[j].endsWith(original[i])) {
break;
}
}
if (j == original.length) {
result.add(original[i]);
}
}
return result.toArray(new String[result.size()]);
}
//搜集子串,并且去掉明显的嵌套子串
private static boolean recurCommon(HashSet<String> r, SuffixTreeNode suffixtree, int count) {
boolean result = false;
while (suffixtree != null) {
int flag = suffixtree.belongTo.size();
if (flag == count) {
result = true;
if (suffixtree.isLeaf()) {
String re = suffixtree.label;
SuffixTreeNode temp = suffixtree;
while (temp.parent != null) {
temp = temp.parent;
re = temp.label + re;
}
r.add(re);
} else {
//只是后缀的后缀
boolean has = recurCommon(r, suffixtree.firstChild, count);
//要完整的后缀
if (!has) {
String re = suffixtree.label;
SuffixTreeNode temp = suffixtree;
while (temp.parent != null) {
temp = temp.parent;
re = temp.label + re;
}
r.add(re);
}
}
}
suffixtree = suffixtree.next;
}
return result;
}
}
class SuffixTreeNode {
//原字符串的位置
//公共就没意义了
int index;
//后缀值
String label;
//兄弟关系
SuffixTreeNode next;
//第一个孩子关系
SuffixTreeNode firstChild = null;
//父亲
SuffixTreeNode parent = null;
//树的层数
int level;
//属于哪个字符串
HashMap<String, String> belongTo = null;
SuffixTreeNode(int i, String s,
int level, HashMap<String, String> flag) {
this.index = i;
this.label = s;
this.level = level;
if (belongTo == null)
belongTo = new HashMap<String, String>();
//Put subject-to information to belongTo...
belongTo.putAll(flag);
}
void setChilden(SuffixTreeNode n) {
this.firstChild = n;
if (n != null)
n.parent = this;
}
boolean isLeaf() {
return (this.firstChild == null);
}
/**
* 在当前结点下插入 新的后缀树结点
*
* @param ind index
* @param str insert_str
* @param level level
* @param belong belong
*/
public void insert(int ind, String str,
int level, HashMap<String, String> belong) {
SuffixTreeNode newnode, firstChild, prev;
String strtemp, prefix;
int index_i;
//第一次 只有根结点
if (this.isLeaf()) {
newnode = new SuffixTreeNode(ind, str,
level + 1, belong);
this.setChilden(newnode);
return;
}
firstChild = this.firstChild;
if (firstChild.label.charAt(0) > str.charAt(0)) {
newnode = new SuffixTreeNode(ind, str,
level + 1, belong);
this.setChilden(newnode);
newnode.next = firstChild;
return;
}
prev = firstChild;
//合适的子结点插入位置
while ((firstChild != null) &&
(firstChild.label.charAt(0) <
str.charAt(0))) {
prev = firstChild;
firstChild = firstChild.next;
}
if (firstChild == null) {
newnode = new SuffixTreeNode(ind, str,
level + 1, belong);
newnode.parent = this;
prev.next = newnode;
return;
}
if (firstChild.label.charAt(0) > str.charAt(0)) {
newnode = new SuffixTreeNode(ind, str,
level + 1, belong);
prev.next = newnode;
newnode.parent = this;
newnode.next = firstChild;
return;
}
//与 str 完全相同
if (str.equals(firstChild.label)) {
//公共前缀属性共有
firstChild.belongTo.putAll(belong);
return;
}
//首字母相同
int minLength = Math.min(firstChild.label.length(), str.length());
for (index_i = 1; index_i < minLength; index_i++) {
if (firstChild.label.charAt(index_i) !=
str.charAt(index_i)) {
break;
}
}
//temp 较短 ,或与 str 完全相同
if (index_i == firstChild.label.length()) {
//str 比 temp 长的部分
strtemp = str.substring(index_i);
firstChild.insert(ind, strtemp, level + 1, belong);
//公共前缀属性共有
firstChild.belongTo.putAll(belong);
return;
}
//str 较短,或者 与 temp 中间 有不同元素
//原来的 temp 前缀 共有
prefix = firstChild.label.substring(0, index_i);
strtemp = firstChild.label.substring(index_i);
//原来 temp 的 与 str 不同的后缀 分离
prev = new SuffixTreeNode(firstChild.index, strtemp,
level + 1, firstChild.belongTo);
prev.setChilden(firstChild.firstChild);
firstChild.setChilden(prev);
firstChild.index = -1;
firstChild.label = prefix;
firstChild.belongTo.putAll(belong);
prev.lowDown();
//加入 原来 str 与 temp 不同的后缀
if (index_i < str.length()) {
strtemp = str.substring(index_i);
firstChild.insert(ind, strtemp, level + 1, belong);
}
}
void print() {
}
/**
* 加入中间树结点,对原树结点中保存的层次信息进行刷新
*/
void lowDown() {
SuffixTreeNode temp;
this.level++;
if (this.isLeaf())
return;
temp = this.firstChild;
while (temp != null) {
temp.lowDown();
temp = temp.next;
}
}
}
LCS 问题
最新推荐文章于 2021-03-30 14:13:25 发布