mysql字符串相似度_匹配字符串相似度算法(各个语言版本)

这篇博客介绍了如何在不同编程语言中实现字符串相似度比较,包括C++、Java、JavaScript和VBScript的Levenshtein距离算法。通过计算两个字符串之间的差异来评估它们的相似性,适用于多种场景如文本匹配和搜索。
摘要由CSDN通过智能技术生成

C++版

#include

#include

#include

using namespace std;

//算法

int ldistance(const string source,const string target)

{

//step 1

int n=source.length();

int m=target.length();

if (m==0) return n;

if (n==0) return m;

//Construct a matrix

typedef vector< vector > Tmatrix;

Tmatrix matrix(n+1);

for(int i=0; i<=n; i++) matrix[i].resize(m+1);

//step 2 Initialize

for(int i=1;i<=n;i++) matrix[i][0]=i;

for(int i=1;i<=m;i++) matrix[0][i]=i;

//step 3

for(int i=1;i<=n;i++)

{

const char si=source[i-1];

//step 4

for(int j=1;j<=m;j++)

{

const char dj=target[j-1];

//step 5

int cost;

if(si==dj){

cost=0;

}

else{

cost=1;

}

//step 6

const int above=matrix[i-1][j]+1;

const int left=matrix[i][j-1]+1;

const int diag=matrix[i-1][j-1]+cost;

matrix[i][j]=min(above,min(left,diag));

}

}//step7

return matrix[n][m];

}

int main(){

string s;

string d;

cout<

cin>>s;

cout<

cin>>d;

int dist=ldistance(s,d);

cout<

}

java版

package io;

public class Test1 {

private int compare(String str, String target) {

int d[][]; // 矩阵

int n = str.length();

int m = target.length();

int i; // 遍历str的

int j; // 遍历target的

char ch1; // str的

char ch2; // target的

int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1

if (n == 0) {return m;}

if (m == 0) { return n; }

d = new int[n + 1][m + 1];

for (i = 0; i <= n; i++) { // 初始化第一列

d[i][0] = i;

}

for (j = 0; j <= m; j++) { // 初始化第一行

d[0][j] = j;

}

for (i = 1; i <= n; i++) { // 遍历str

ch1 = str.charAt(i - 1);

// 去匹配target

for (j = 1; j <= m; j++) {

ch2 = target.charAt(j - 1);

if (ch1 == ch2) {

temp = 0;

} else {

temp = 1;

}

// 左边+1,上边+1, 左上角+temp取最小

d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp);

}

}

return d[n][m];

}

private int min(int one, int two, int three) {

return (one = one < two ? one : two) < three ? one : three;

}

/**

* 获取两字符串的相似度

*

* @param str

* @param target

*

* @return

*/

public float getSimilarityRatio(String str, String target) {

return 1 - (float) compare(str, target) / Math.max(str.length(), target.length());

}

public static void main(String[] args) {

Test1 t=new Test1();

String tag="MySQL Server 5.6";//正确的

String test1="mysql";

String test2="MySQL";

String test3="MySQL Server";

String test4="MySQL 5.6";

String test5="Server 5.6";

String test6="SQL Server 5.6";

String test7="MySQL Server 5.6";

System.out.println("similarityRatio=" + t.getSimilarityRatio(test1, tag));

System.out.println("similarityRatio=" + t.getSimilarityRatio(test2, tag));

System.out.println("similarityRatio=" + t.getSimilarityRatio(test3, tag));

System.out.println("similarityRatio=" + t.getSimilarityRatio(test4, tag));

System.out.println("similarityRatio=" + t.getSimilarityRatio(test5, tag));

System.out.println("similarityRatio=" + t.getSimilarityRatio(test6, tag));

System.out.println("similarityRatio=" + t.getSimilarityRatio(test7, tag));

}

}

JavaScript版

function compare(x, y) {

var z = 0;

var s = x.length + y.length;;

x.sort();

y.sort();

var a = x.shift();

var b = y.shift();

while(a !== undefined && b !== undefined) {

if (a === b) {

z++;

a = x.shift();

b = y.shift();

} else if (a < b) {

a = x.shift();

} else if (a > b) {

b = y.shift();

}

}

return z/s * 200;

}

console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello']))

console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello'].sort()))

VBScript版

Function GetLevenshteinDistince(str1, str2)

Dim x, y, A, B, C, K

Dim Matrix()

ReDim Matrix(Len(str2), Len(str1))

'初始化第一行和第一列

For x = 0 To UBound(Matrix, 1)

Matrix(x, 0) = x

Next

For y = 0 To UBound(Matrix, 2)

Matrix(0, y) = y

Next

'填充矩阵

For x = 1 To UBound(Matrix, 1)

For y = 1 To UBound(Matrix, 2)

If (Mid(str1, Matrix(0, y), 1) = Mid(str2, Matrix(x, 0), 1)) Then

C = Matrix(x -1 ,y - 1)

Else

C = Matrix(x -1 ,y - 1) + 1

End If

A = Matrix(x - 1, y) + 1

B = Matrix(x, y - 1) + 1

If (A =< B and A =< C) Then Matrix(x, y) = A

If (B =< C and B =< A) Then Matrix(x, y) = B

If (C =< A and C =< B) Then Matrix(x, y) = C

Next

Next

'计算 LD 值

If (Len(str1) > Len(str2)) Then

K = Len(str1)

Else

K = Len(str2)

End If

GetLevenshteinDistince = FormatNumber(1 - (Matrix(Len(str2), Len(str1)) / K), 3, True)

End Function

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值