给定2个字符,计算字符串发生了那些变化(插入、删除、替换)
import { insert, remove, update } from "ramda";
// 计算差异度
export function levenshteinDistance<T = any>(
a: T[],
b: T[],
compose: (a: T, b: T) => boolean = (a, b) => a === b
): number {
return levenshteinDistanceMatrix(a, b, compose)[b.length][a.length];
}
// 计算差异矩阵,详见算法levenshtein distance
export function levenshteinDistanceMatrix<T = any>(
a: T[],
b: T[],
compose: (a: T, b: T) => boolean = (a, b) => a === b
): number[][] {
const distanceMatrix = Array(b.length + 1)
.fill(null)
.map(() => Array(a.length + 1).fill(null));
for (let i = 0; i <= a.length; i += 1) {
distanceMatrix[0][i] = i;
}
for (let j = 0; j <= b.length; j += 1) {
distanceMatrix[j][0] = j;
}
for (let j = 1; j <= b.length; j += 1) {
for (let i = 1; i <= a.length; i += 1) {
const indicator = compose(
a[i - 1],
b[j - 1]
)
? 0
: 1;
const min = Math.min(
distanceMatrix[j][i - 1] + 1,
distanceMatrix[j - 1][i] + 1,
distanceMatrix[j - 1][i - 1] + indicator
);
distanceMatrix[j][i] = min;
}
}
return distanceMatrix;
}
// 字符串变化类型
export enum LevenshteinOperatorType {
// 删除
deletion,
// 插入
insertion,
// 替换
substitution
}
// 删除数据结构
export interface LevenshteinDeletion<T = any> {
type: LevenshteinOperatorType.deletion;
index: number;
value: T;
}
// 插入数据结构
export interface LevenshteinInsertion<T = any> {
type: LevenshteinOperatorType.insertion;
index: number;
value: T;
}
// 替换数据结构
export interface LevenshteinSubstitution<T = any> {
type: LevenshteinOperatorType.substitution;
index: number;
value: {
old: T;
new: T;
};
}
export type LevenshteinOperator<T = any> =
| LevenshteinDeletion<T>
| LevenshteinInsertion<T>
| LevenshteinSubstitution<T>;
//是否删除
export function isLevenshteinDeletion<T = any>(
val: LevenshteinOperator<T>
): val is LevenshteinDeletion<T> {
return val.type === LevenshteinOperatorType.deletion;
}
// 是否插入
export function isLevenshteinInsertion<T = any>(
val: LevenshteinOperator<T>
): val is LevenshteinInsertion<T> {
return val.type === LevenshteinOperatorType.insertion;
}
// 是否替换
export function isLevenshteinSubstitution<T = any>(
val: LevenshteinOperator<T>
): val is LevenshteinSubstitution<T> {
return val.type === LevenshteinOperatorType.substitution;
}
// 默认最大值
const max = 9999999999;
export function levenshteinOperators<T = any>(
a: T[],
b: T[],
compose: (a: T, b: T) => boolean = (a, b) => a === b
): LevenshteinOperator<T>[] {
const res = levenshteinDistanceMatrix<T>(a, b, compose);
// const dd = res.map(col => `|${col.join("|")}|`);
// console.log(`${dd.join("\n")}`);
// 求最小值坐标
let i = b.length; // 行
let j = a.length; //列
let copy = a;
let operators: LevenshteinOperator<T>[] = [];
while (i > 0 || j > 0) {
let deletion = max,
insertion = max,
substitution = max;
const indicator = res[i][j];
if (j > 0) {
deletion = res[i][j - 1];
}
if (i > 0) {
insertion = res[i - 1][j];
}
if (i > 0 && j > 0) {
substitution = res[i - 1][j - 1];
}
const min = Math.min(deletion, insertion, substitution);
if (min === insertion) {
if (min !== indicator) {
operators.push({
type: LevenshteinOperatorType.insertion,
value: b[i - 1],
index: j - 1
});
}
i -= 1;
} else if (min === substitution) {
if (min !== indicator) {
operators.push({
type: LevenshteinOperatorType.substitution,
value: { old: a[j - 1], new: b[i - 1] },
index: j - 1
});
}
i -= 1;
j -= 1;
} else if (min === deletion) {
if (min !== indicator) {
operators.push({
type: LevenshteinOperatorType.deletion,
value: a[j - 1],
index: j - 1
});
}
j -= 1;
}
}
return operators;
}
// 根据操作符,生成目标字符串
export function levenshteinTest<T>(
a: T[],
operators: LevenshteinOperator<T>[]
) {
operators.map(opt => {
if (isLevenshteinDeletion(opt)) {
a = remove(opt.index, 1, a);
}
if (isLevenshteinInsertion(opt)) {
a = insert(opt.index, opt.value, a);
}
if (isLevenshteinSubstitution(opt)) {
a = update(opt.index, opt.value.new, a);
}
});
return a;
}
复制代码
import { expect } from "chai";
import {
levenshteinOperators,
isLevenshteinSubstitution
} from "./levenshteinDistance";
const a = `let item = 2;`;
const b = `let item = 3;`;
const ops = levenshteinOperators(a.split(""), b.split(""));
describe("", () => {
it("", () => {
// 操作步骤长度为1
expect(ops.length).equal(1);
// 是替换操作符
expect(isLevenshteinSubstitution(ops[0])).equal(true);
// 将2替换为3
expect((ops[0] as LevenshteinSubstitution).value.old).equal("2");
expect((ops[0] as LevenshteinSubstitution).value.new).equal("3");
});
});
复制代码
例子
levenshteinDistance
0 | 1 | 2 | 3 | 4 | 5 |
---|---|---|---|---|---|
1 | 【2】插入112 | 2 | 3 | 4 | 5 |
2 | 【2】插入, | 2 | 3 | 4 | 5 |
3 | 【3】插入223 | 3 | 3 | 4 | 5 |
4 | 【4】插入, | 4 | 4 | 4 | 5 |
5 | 【4】不变let | 5 | 5 | 5 | 5 |
6 | 5 | 【4】不变item | 5 | 6 | 6 |
7 | 6 | 5 | 【4】不变= | 5 | 6 |
8 | 7 | 6 | 5 | 【5】替换233->2 | 6 |
9 | 8 | 7 | 6 | 6 | 【5】不变; |
const old = "let item = 233;";
const newStr = "112,223,let item = 2;";