Levenshtein 距离,又称编辑距离,指的是两个字符串之间,由一个转换成另一个所需的最少编辑操作次数。
许可的编辑操作包括将一个字符替换成另一个字符,插入一个字符,删除一个字符。
编辑距离的算法是首先由俄国科学家Levenshtein提出的,故又叫Levenshtein Distance。
1、Java
- public static void levenshtein(String str1, String str2) {
- // 计算两个字符串的长度。
- int len1 = str1.length();
- int len2 = str2.length();
- // 建立上面说的数组,比字符长度大一个空间
- int[][] dif = new int[len1 + 1][len2 + 1];
- // 赋初值,步骤B。
- for (int a = 0; a <= len1; a++) {
- dif[a][0] = a;
- }
- for (int a = 0; a <= len2; a++) {
- dif[0][a] = a;
- }
- // 计算两个字符是否一样,计算左上的值
- int temp;
- for (int i = 1; i <= len1; i++) {
- for (int j = 1; j <= len2; j++) {
- System.out.println("i = " + i + " j = " + j + " str1 = "
- + str1.charAt(i - 1) + " str2 = " + str2.charAt(j - 1));
- if (str1.charAt(i - 1) == str2.charAt(j - 1)) {
- temp = 0;
- } else {
- temp = 1;
- }
- // 取三个值中最小的
- dif[i][j] = min(dif[i - 1][j - 1] + temp, dif[i][j - 1] + 1,
- dif[i - 1][j] + 1);
- System.out.println("i = " + i + ", j = " + j + ", dif[i][j] = "
- + dif[i][j]);
- }
- }
- System.out.println("字符串\"" + str1 + "\"与\"" + str2 + "\"的比较");
- // 取数组右下角的值,同样不同位置代表不同字符串的比较
- System.out.println("差异步骤:" + dif[len1][len2]);
- // 计算相似度
- float similarity = 1 - (float) dif[len1][len2]
- / Math.max(str1.length(), str2.length());
- System.out.println("相似度:" + similarity);
- }
- </span></span>
2、LotusScript
- Function toCompute(str1 As String ,str2 As String) As Double
- Dim len1 As Integer
- Dim len2 As Integer
- Dim maxlen As Integer
- Dim i As long
- Dim j As long
- Dim temp As long
- Dim similarity As Double
- If str1= "" Or str2 = "" Then
- toCompute = 0
- Else
- len1 = Len(str1)
- len2 = Len(str2)
- Dim dif(0 To 120, 0 To 120) As Integer
- If len1 > 120 Then
- len1 = 120
- End If
- If len2 > 120 Then
- len2 = 120
- End If
- If len1 > len2 Then
- maxlen = len1
- Else
- maxlen = len2
- End If
- For i = 0 To len1 Step 1
- dif(i,0) = i
- Next
- For i = 0 To len2 Step 1
- dif(0,i) = i
- Next
- For i = 1 To len1 Step 1
- For j = 1 To len2 Step 1
- 'Print "i = "& i & " j = " & j &" str1 = " & Right$(Left$(str1,i),1) &" str2 = " &Right$(Left$(str2,j),1)
- If Right$(Left$(str1,i),1) = Right$(Left$(str2,j),1) Then
- temp = 0
- Else
- temp = 1
- End If
- dif(i,j) = min(dif(i-1,j-1)+ temp ,dif(i,j-1)+1,dif(i-1,j)+1)
- Next
- Next
- 'Print "差异步骤: " & dif(len1 ,len2)
- similarity = 1 - dif(len1 ,len2 )/maxlen
- 'Print "差异度:" & similarity
- toCompute = similarity
- 'Call toLogFile("str1 = " & str1 &" str2 = " &str2 & " 相似度: " & similarity)
- End If
- End Function
- </span></span>
优化
1、Visual Basic
- Module Module1
- Sub Main()
- Dim str1 As String
- Dim str2 As String
- str1 = "今天是星期五"
- str2 = "明天星期四"
- Dim dis As New clsDistance(str1, str2)
- Dim result As Integer
- result = dis.CacuDistance()
- Console.WriteLine(result)
- End Sub
- Public Class clsDistance
- Private mCharA() As Char
- Private mCharB() As Char
- Private mCharALen As Integer
- Private mCharBLen As Integer
- Public Sub New(ByVal StrA As String, ByVal StrB As String)
- mCharA = StrA.ToCharArray
- mCharB = StrB.ToCharArray
- mCharALen = mCharA.Length
- mCharBLen = mCharB.Length
- End Sub
- Public Function CacuDistance() As Integer
- Dim i As Integer
- If mCharALen = 0 Then Return mCharBLen
- If mCharBLen = 0 Then Return mCharALen
- Console.WriteLine(mCharALen)
- Console.WriteLine(mCharBLen)
- Dim j As Integer = Min(mCharALen, mCharBLen) - 1
- Dim tP1 As Integer, tP2 As Integer
- tP1 = -1
- tP2 = -1
- For i = 0 To j
- If mCharA(i) <> mCharB(i) Then
- tP1 = i
- Exit For
- End If
- Next
- If tP1 = -1 Then Return Math.Abs(mCharALen - mCharBLen)
- For i = 0 To j - tP1
- If mCharA(mCharALen - i - 1) <> mCharB(mCharBLen - i - 1) Then
- tP2 = i
- Exit For
- End If
- Next
- If tP2 = -1 Then Return Math.Abs(mCharALen - mCharBLen)
- Console.WriteLine("tp1: = " & tP1)
- Console.WriteLine("tp2 : = " & tP2)
- Dim tA(mCharALen - tP1 - tP2) As Integer
- For i = 0 To tA.GetUpperBound(0)
- tA(i) = i
- Next
- For i = 0 To tA.GetUpperBound(0) Step 1
- Console.WriteLine(" i = " + CStr(i) + " " & tA(i))
- Next
- Console.WriteLine("Bound: = " & tA.GetUpperBound(0))
- Dim tN1 As Integer, tN2 As Integer, tN3 As Integer
- For i = 0 To mCharBLen - tP1 - tP2 - 1
- tN1 = tA(0)
- tN2 = tN1 + 1
- Console.WriteLine("i = " & i & " " & mCharB(mCharBLen - tP2 - i - 1))
- For j = 1 To tA.GetUpperBound(0)
- Console.WriteLine("j = " & j & " " & mCharA(mCharALen - tP2 - j))
- If mCharA(mCharALen - tP2 - j) = mCharB(mCharBLen - tP2 - i - 1) Then
- tN3 = tN1
- Else
- tN3 = Min(tA(j), tN1, tN2) + 1
- End If
- tA(j - 1) = tN2
- tN2 = tN3
- tN1 = tA(j)
- Console.WriteLine("tn1 = " & tN1)
- Console.WriteLine("tn2 = " & tN2)
- Console.WriteLine("tn3 = " & tN3)
- Next
- tA(tA.GetUpperBound(0)) = tN2
- Console.WriteLine(tA.GetUpperBound(0) & " " & tA(tA.GetUpperBound(0)))
- Next
- For i = 0 To tA.GetUpperBound(0) Step 1
- Console.WriteLine(" i = " + CStr(i) + " " & tA(i))
- Next
- Return tA(tA.GetUpperBound(0))
- End Function
- Public Function Min(ByVal ParamArray Num() As Integer) As Integer
- Dim tN As Integer, i As Integer
- If Num.Length = 0 Then Return Nothing
- tN = Num(0)
- For i = 1 To Num.GetUpperBound(0)
- If Num(i) < tN Then tN = Num(i)
- Next
- Return tN
- End Function
- End Class
- End Module
- </span>
2、Java
- public static int clsDistance(String str1, String str2) {
- int j;
- int i;
- int mCharALen, mCharBLen;
- mCharALen = str1.length();
- mCharBLen = str2.length();
- int tp1 = -1;
- int tp2 = -1;
- j = Math.min(mCharALen , mCharBLen) - 1;
- for (i = 0; i <= j; i++) {
- if (str1.charAt(i) != str2.charAt(i)) {
- tp1 = i;
- break;
- }
- }
- if (tp1 == -1) {
- return Math.abs(mCharBLen - mCharALen);
- }
- for (i = 0; i <= j - tp1; i++) {
- if (str1.charAt(mCharALen - i - 1) != str2.charAt(mCharBLen - i
- - 1)) {
- tp2 = i;
- break;
- }
- }
- if (tp2 == -1) {
- return Math.abs(mCharALen - mCharBLen);
- }
- int taBound = mCharALen - tp1 - tp2;
- int tA[] = new int[taBound + 1];
- for (i = 0; i < tA.length; i++) {
- tA[i] = i ;
- }
- System.out.println(Arrays.toString(tA));
- int tN1, tN2, tN3;
- for (i = 0; i < mCharBLen - tp1 - tp2 ; i++) {
- tN1 = tA[0];
- tN2 = tN1 + 1;
- System.out.println("\n" + i + " " + str2.charAt(mCharBLen
- - tp2 - i - 1));
- for (j = 1; j < tA.length ; j++) {
- System.out.print(str1.charAt(mCharALen - tp2 - j ) +" ");
- if (str1.charAt(mCharALen - tp2 - j ) == str2.charAt(mCharBLen
- - tp2 - i - 1)) {
- tN3 = tN1;
- } else {
- tN3 = Math.min(tA[j], Math.min(tN1, tN2)) + 1;
- }
- tA[j - 1] = tN2;
- tN2 = tN3;
- tN1 = tA[j];
- System.out.println("\ntN1 = " + tN1);
- System.out.println("tN2 = " + tN2);
- System.out.println("tN3 = " + tN3);
- }
- tA[tA.length - 1] = tN2;
- System.out.println("\n"+tA[tA.length - 1] );
- }
- System.out.println("\n" +Arrays.toString(tA));
- return tA[tA.length - 1];
- }</span>
3、Lotus Script
- %REM
- Function clsDistance
- Description: Comments for Function
- %END REM
- Function clsDistance(str1 As String ,str2 As String) As Double
- Dim mCharALen As Integer
- Dim mCharBLen As Integer
- Dim i As Integer
- Dim simularity As Double
- Dim maxlen As Integer
- mCharALen = Len(str1)
- mCharBLen = Len(str2)
- If mCharALen > mCharBLen Then
- maxlen = mCharALen
- Else
- maxlen = mCharBLen
- End If
- If str1= "" Or str2 = "" Then
- clsDistance = 0
- Exit function
- End If
- Dim j As Integer
- If mCharALen > mCharBLen Then
- j = mCharBLen - 1
- Else
- j = mCharALen - 1
- End If
- Dim tP1 , tP2 As Integer
- tP1 = -1
- tP2 = -1
- For i = 0 To j Step 1
- If Right$(Left$(str1,i+1),1) <> Right$(Left$(str2,i+1),1) Then
- tP1 = i
- Exit For
- End If
- Next
- If tP1 = -1 Then
- clsDistance = 1 - Abs(mCharALen - mCharBLen) / maxlen
- Exit Function
- End If
- For i = 0 To j - tP1
- If Right$(Left$(str1,mCharALen - i),1) <> Right$(Left$(str2,mCharBLen - i),1) Then
- tP2 = i
- Exit For
- End If
- Next
- If tP2 = -1 Then
- clsDistance = 1 - Abs(mCharALen - mCharBLen) / maxlen
- Exit Function
- End If
- Dim tA(15000) As Integer
- Dim tABound As Integer
- tABound = mCharALen - tP1 - tP2 + 1
- For i = 0 To tABound Step 1
- tA(i) = i
- Next
- Dim tN1 As Integer, tN2 As Integer, tN3 As Integer
- For i = 0 To mCharBLen - tP1 - tP2
- tN1 = tA(0)
- tN2 = tN1 + 1
- For j = 1 To tABound
- If Right$(Left$(str1,mCharALen - tP2 - j + 1),1) = Right$(Left$(str2,mCharBLen - tP2 - i),1) Then
- tN3 = tN1
- Else
- tN3 = Min(tA(j), tN1, tN2) + 1
- End If
- tA(j - 1) = tN2
- tN2 = tN3
- tN1 = tA(j)
- Next
- tA(tABound) = tN2
- Next
- simularity = 1 - tA(tABound) / maxlen
- clsDistance = simularity
- End Function</span>