--- 最长公共子序列 (LCS) --- 一个序列的子序列是在该序列中删去若干元素后得到的序列 例:"ABCD"和"BDF"都是"ABCDEFG"的子序列 最长公共子序列 (LCS) 问题: 给定两个序列 X 和 Y,求 X 和 Y 长度最大的公共子序列 例:X = "ABBCBDE", Y = "DBBCDB", LCS(X, Y) = "BBCD" 适用场景: 1. 字符串相似度比对
def lcs_length(x, y):
m, n = len(x), len(y)
dp = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
# i j 位置上的字符匹配时,来自于左上方+1
if x[i - 1] == y[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
else:
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
return dp[m][n]
def lcs(x, y):
m, n = len(x), len(y)
dp = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
# 记录路径:1. 左上方,2. 上方,3. 左方
trace = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
# 来自于左上方
if x[i - 1] == y[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
trace[i][j] = 1
# 来自于上方
elif dp[i - 1][j] > dp[i][j - 1]:
dp[i][j] = dp[i - 1][j]
trace[i][j] = 2
# 来自于左方
else:
dp[i][j] = dp[i][j - 1]
trace[i][j] = 3
return dp[m][n], trace
def lcs_traceback(x, y):
dp, trace = lcs(x, y)
i, j = len(x), len(y)
res = []
while i and j:
# 来自于左上方:匹配
if trace[i][j] == 1:
res.append(x[i - 1])
i -= 1
j -= 1
# 来自于上方:不匹配
elif trace[i][j] == 2:
i -= 1
# 来自于左方:不匹配
else:
j -= 1
return ''.join(reversed(res))