给定2个字符串,如何计算变化(插入、删除、替换)?【levenshtein distance 算法】

给定2个字符,计算字符串发生了那些变化(插入、删除、替换)

import { insert, remove, update } from "ramda";
// 计算差异度
export function levenshteinDistance<T = any>(
  a: T[],
  b: T[],
  compose: (a: T, b: T) => boolean = (a, b) => a === b
): number {
  return levenshteinDistanceMatrix(a, b, compose)[b.length][a.length];
}
// 计算差异矩阵,详见算法levenshtein distance
export function levenshteinDistanceMatrix<T = any>(
  a: T[],
  b: T[],
  compose: (a: T, b: T) => boolean = (a, b) => a === b
): number[][] {
  const distanceMatrix = Array(b.length + 1)
    .fill(null)
    .map(() => Array(a.length + 1).fill(null));
  for (let i = 0; i <= a.length; i += 1) {
    distanceMatrix[0][i] = i;
  }
  for (let j = 0; j <= b.length; j += 1) {
    distanceMatrix[j][0] = j;
  }

  for (let j = 1; j <= b.length; j += 1) {
    for (let i = 1; i <= a.length; i += 1) {
      const indicator = compose(
        a[i - 1],
        b[j - 1]
      )
        ? 0
        : 1;
      const min = Math.min(
        distanceMatrix[j][i - 1] + 1,
        distanceMatrix[j - 1][i] + 1,
        distanceMatrix[j - 1][i - 1] + indicator
      );
      distanceMatrix[j][i] = min;
    }
  }
  return distanceMatrix;
}
// 字符串变化类型
export enum LevenshteinOperatorType {
  // 删除
  deletion,
  // 插入
  insertion,
  // 替换
  substitution
}
// 删除数据结构
export interface LevenshteinDeletion<T = any> {
  type: LevenshteinOperatorType.deletion;
  index: number;
  value: T;
}
// 插入数据结构
export interface LevenshteinInsertion<T = any> {
  type: LevenshteinOperatorType.insertion;
  index: number;
  value: T;
}
// 替换数据结构
export interface LevenshteinSubstitution<T = any> {
  type: LevenshteinOperatorType.substitution;
  index: number;
  value: {
    old: T;
    new: T;
  };
}

export type LevenshteinOperator<T = any> =
  | LevenshteinDeletion<T>
  | LevenshteinInsertion<T>
  | LevenshteinSubstitution<T>;
//是否删除
export function isLevenshteinDeletion<T = any>(
  val: LevenshteinOperator<T>
): val is LevenshteinDeletion<T> {
  return val.type === LevenshteinOperatorType.deletion;
}
// 是否插入
export function isLevenshteinInsertion<T = any>(
  val: LevenshteinOperator<T>
): val is LevenshteinInsertion<T> {
  return val.type === LevenshteinOperatorType.insertion;
}
// 是否替换
export function isLevenshteinSubstitution<T = any>(
  val: LevenshteinOperator<T>
): val is LevenshteinSubstitution<T> {
  return val.type === LevenshteinOperatorType.substitution;
}
// 默认最大值
const max = 9999999999;
export function levenshteinOperators<T = any>(
  a: T[],
  b: T[],
  compose: (a: T, b: T) => boolean = (a, b) => a === b
): LevenshteinOperator<T>[] {
  const res = levenshteinDistanceMatrix<T>(a, b, compose);
  // const dd = res.map(col => `|${col.join("|")}|`);
  // console.log(`${dd.join("\n")}`);
  // 求最小值坐标
  let i = b.length; // 行
  let j = a.length; //列
  let copy = a;
  let operators: LevenshteinOperator<T>[] = [];

  while (i > 0 || j > 0) {
    let deletion = max,
      insertion = max,
      substitution = max;
    const indicator = res[i][j];
    if (j > 0) {
      deletion = res[i][j - 1];
    }
    if (i > 0) {
      insertion = res[i - 1][j];
    }
    if (i > 0 && j > 0) {
      substitution = res[i - 1][j - 1];
    }
    const min = Math.min(deletion, insertion, substitution);
    if (min === insertion) {
      if (min !== indicator) {
        operators.push({
          type: LevenshteinOperatorType.insertion,
          value: b[i - 1],
          index: j - 1
        });
      }
      i -= 1;
    } else if (min === substitution) {
      if (min !== indicator) {
        operators.push({
          type: LevenshteinOperatorType.substitution,
          value: { old: a[j - 1], new: b[i - 1] },
          index: j - 1
        });
      }
      i -= 1;
      j -= 1;
    } else if (min === deletion) {
      if (min !== indicator) {
        operators.push({
          type: LevenshteinOperatorType.deletion,
          value: a[j - 1],
          index: j - 1
        });
      }
      j -= 1;
    }
  }
  return operators;
}
// 根据操作符,生成目标字符串
export function levenshteinTest<T>(
  a: T[],
  operators: LevenshteinOperator<T>[]
) {
  operators.map(opt => {
    if (isLevenshteinDeletion(opt)) {
      a = remove(opt.index, 1, a);
    }
    if (isLevenshteinInsertion(opt)) {
      a = insert(opt.index, opt.value, a);
    }
    if (isLevenshteinSubstitution(opt)) {
      a = update(opt.index, opt.value.new, a);
    }
  });
  return a;
}
复制代码
import { expect } from "chai";
import {
  levenshteinOperators,
  isLevenshteinSubstitution
} from "./levenshteinDistance";

const a = `let item = 2;`;
const b = `let item = 3;`;
const ops = levenshteinOperators(a.split(""), b.split(""));

describe("", () => {
  it("", () => {
    // 操作步骤长度为1
    expect(ops.length).equal(1);
    // 是替换操作符
    expect(isLevenshteinSubstitution(ops[0])).equal(true);
    // 将2替换为3
    expect((ops[0] as LevenshteinSubstitution).value.old).equal("2");
    expect((ops[0] as LevenshteinSubstitution).value.new).equal("3");
  });
});
复制代码

例子

levenshteinDistance

012345
1【2】插入1122345
2【2】插入,2345
3【3】插入2233345
4【4】插入,4445
5【4】不变let5555
65【4】不变item566
765【4】不变=56
8765【5】替换233->26
98766【5】不变;

const old = "let item = 233;"; const newStr = "112,223,let item = 2;";

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值