# 匹配字符串相似度算法(各个语言版本)

C++版

#include <iostream>
#include <vector>
#include <string>
using namespace std;

//算法
int ldistance(const string source,const string target)
{
//step 1

int n=source.length();
int m=target.length();
if (m==0) return n;
if (n==0) return m;
//Construct a matrix
typedef vector< vector<int> >  Tmatrix;
Tmatrix matrix(n+1);
for(int i=0; i<=n; i++)  matrix[i].resize(m+1);

//step 2 Initialize

for(int i=1;i<=n;i++) matrix[i][0]=i;
for(int i=1;i<=m;i++) matrix[0][i]=i;

//step 3
for(int i=1;i<=n;i++)
{
const char si=source[i-1];
//step 4
for(int j=1;j<=m;j++)
{

const char dj=target[j-1];
//step 5
int cost;
if(si==dj){
cost=0;
}
else{
cost=1;
}
//step 6
const int above=matrix[i-1][j]+1;
const int left=matrix[i][j-1]+1;
const int diag=matrix[i-1][j-1]+cost;
matrix[i][j]=min(above,min(left,diag));

}
}//step7
return matrix[n][m];
}
int main(){
string s;
string d;
cout<<"source=";
cin>>s;
cout<<"diag=";
cin>>d;
int dist=ldistance(s,d);
cout<<"dist="<<dist<<endl;
}

java版

package io;

public class Test1 {
private int compare(String str, String target) {
int d[][]; // 矩阵
int n = str.length();
int m = target.length();
int i; // 遍历str的
int j; // 遍历target的
char ch1; // str的
char ch2; // target的
int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1

if (n == 0) {return m;}
if (m == 0) { return n; }
d = new int[n + 1][m + 1];
for (i = 0; i <= n; i++) { // 初始化第一列
d[i][0] = i;
}
for (j = 0; j <= m; j++) { // 初始化第一行
d[0][j] = j;
}
for (i = 1; i <= n; i++) { // 遍历str
ch1 = str.charAt(i - 1);
// 去匹配target
for (j = 1; j <= m; j++) {
ch2 = target.charAt(j - 1);
if (ch1 == ch2) {
temp = 0;
} else {
temp = 1;
}
// 左边+1,上边+1, 左上角+temp取最小
d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp);
}
}
return d[n][m];
}
private int min(int one, int two, int three) {
return (one = one < two ? one : two) < three ? one : three;
}
/**
* 获取两字符串的相似度
*
* @param str
* @param target
*
* @return
*/
public float getSimilarityRatio(String str, String target) {
return 1 - (float) compare(str, target) / Math.max(str.length(), target.length());
}
public static void main(String[] args) {
Test1 t=new Test1();
String tag="MySQL Server 5.6";//正确的
String test1="mysql";
String test2="MySQL";
String test3="MySQL Server";
String test4="MySQL 5.6";
String test5="Server 5.6";
String test6="SQL Server 5.6";
String test7="MySQL Server 5.6";
System.out.println("similarityRatio=" + t.getSimilarityRatio(test1, tag));
System.out.println("similarityRatio=" + t.getSimilarityRatio(test2, tag));
System.out.println("similarityRatio=" + t.getSimilarityRatio(test3, tag));
System.out.println("similarityRatio=" + t.getSimilarityRatio(test4, tag));
System.out.println("similarityRatio=" + t.getSimilarityRatio(test5, tag));
System.out.println("similarityRatio=" + t.getSimilarityRatio(test6, tag));
System.out.println("similarityRatio=" + t.getSimilarityRatio(test7, tag));
}
}


JavaScript版

function compare(x, y) {
var z = 0;
var s = x.length + y.length;;

x.sort();
y.sort();
var a = x.shift();
var b = y.shift();

while(a !== undefined && b !== undefined) {
if (a === b) {
z++;
a = x.shift();
b = y.shift();
} else if (a < b) {
a = x.shift();
} else if (a > b) {
b = y.shift();
}
}
return z/s * 200;
}

console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello']))
console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello'].sort()))
VBScript版

Function GetLevenshteinDistince(str1, str2)
Dim x, y, A, B, C, K
Dim Matrix()
ReDim Matrix(Len(str2), Len(str1))

'初始化第一行和第一列
For x = 0 To UBound(Matrix, 1)
Matrix(x, 0) = x
Next
For y = 0 To UBound(Matrix, 2)
Matrix(0, y) = y
Next

'填充矩阵
For x = 1 To UBound(Matrix, 1)
For y = 1 To UBound(Matrix, 2)
If (Mid(str1, Matrix(0, y), 1) = Mid(str2, Matrix(x, 0), 1)) Then
C = Matrix(x -1 ,y - 1)
Else
C = Matrix(x -1 ,y - 1) + 1
End If

A = Matrix(x - 1, y) + 1
B = Matrix(x, y - 1) + 1

If (A =< B and A =< C) Then Matrix(x, y) = A
If (B =< C and B =< A) Then Matrix(x, y) = B
If (C =< A and C =< B) Then Matrix(x, y) = C
Next
Next

'计算 LD 值
If (Len(str1) > Len(str2)) Then
K = Len(str1)
Else
K = Len(str2)
End If

GetLevenshteinDistince = FormatNumber(1 - (Matrix(Len(str2), Len(str1)) / K), 3, True)
End Function
