最长公共子序列
问题描述: 给定两个不为空的字符串或数列,求出这两个字符串或序列最长的公共子序列。
思路: 最长公共子序列和最长公共子串概念不同,子串要求必须为连续,而子序列则不要求连续性,只需保证公共子序列每一个字符的先后顺序在两个字符串或序列中一致即可。因此,我们在公共子串的基础上做一定修改,递推公式作如下改变:
f
(
i
,
j
)
=
{
0
A
[
i
]
≠
B
[
j
]
∩
i
=
0
∩
j
=
0
1
A
[
i
]
=
B
[
j
]
∩
i
=
0
∩
j
=
0
m
a
x
(
f
(
i
,
j
−
1
)
,
f
(
i
−
1
,
j
)
)
A
[
i
]
≠
B
[
j
]
f
(
i
−
1
,
j
−
1
)
+
1
A
[
i
]
=
B
[
j
]
f(i,j)=\begin{cases} 0 & \text A[i]\not=B[j]\cap i=0 \ \cap \ j = 0\\1 & \text A[i]=B[j]\cap i=0 \ \cap \ j = 0\\max(f(i, j-1),f(i-1, j)) & \text A[i]\not=B[j]\\f(i-1, j-1) +1 & \text A[i]=B[j] \end{cases}
f(i,j)=⎩⎪⎪⎪⎨⎪⎪⎪⎧01max(f(i,j−1),f(i−1,j))f(i−1,j−1)+1A[i]=B[j]∩i=0 ∩ j=0A[i]=B[j]∩i=0 ∩ j=0A[i]=B[j]A[i]=B[j]
同最长公共子串的主要区别是,最长公共子序列在当前位置元素不相等时,会累积之前元素匹配结果的最大值,而不是归零。下面给出实现代码:
class solution:
def __init__(self, A, B):
self.A = A
self.B = B
self.Matrix = []
self.seq = []
self.lcsequence(self.A, self.B)
def lcsequence(self, A, B):
lengthA = len(A)
lengthB = len(B)
longest = 0
if lengthA == 0 or lengthB == 0:
return
for i in range(lengthA):
self.Matrix.append([])
for j in range(lengthB):
if A[i] == B[j]:
if i == 0 or j == 0:
self.Matrix[i].append(1)
else:
self.Matrix[i].append(self.Matrix[i - 1][j - 1] + 1)
else:
if i == 0 and j == 0:
self.Matrix[i].append(0)
elif i == 0:
self.Matrix[i].append(self.Matrix[i][j-1])
elif j == 0:
self.Matrix[i].append(self.Matrix[i-1][j])
else:
self.Matrix[i].append(max(self.Matrix[i-1][j], self.Matrix[i][j-1]))
if self.Matrix[i][j] > longest:
longest = self.Matrix[i][j]
self.sequences()
print(longest)
print(self.seq)
def sequences(self):
if len(self.Matrix) == 0:
return
self.search(self.Matrix, len(self.A)-1, len(self.B)-1)
def search(self, Matrix, i, j, record=[]):
if Matrix[i][j] == 0:
if isinstance(self.A, list) :
if list(reversed(record)) not in self.seq:
self.seq.append(list(reversed(record)))
return
elif isinstance(self.A, str):
if "".join(reversed(record)) not in self.seq:
self.seq.append("".join(reversed(record)))
return
elif i==0 or j==0:
record.append(self.A[i] if i==0 else self.B[j])
if isinstance(self.A, list) :
if list(reversed(record)) not in self.seq:
self.seq.append(list(reversed(record)))
elif isinstance(self.A, str):
if "".join(reversed(record)) not in self.seq:
self.seq.append("".join(reversed(record)))
del record[-1]
return
if Matrix[i][j] == Matrix[i][j-1]:
self.search(Matrix, i, j-1, record)
if Matrix[i][j] == Matrix[i-1][j]:
self.search(Matrix, i-1, j, record)
if Matrix[i][j] == Matrix[i-1][j-1]+1 and self.A[i] == self.B[j]:
record.append(self.A[i])
self.search(Matrix, i-1, j-1, record)
del record[-1]
return
if __name__ == '__main__':
sol = solution("13456778","357486782")
上述代码输出为最长公共子序列长度,以及符合这一长度的所有结果。输出结果如下:
根据结果对比实际答案,我们的算法基本正确。