匹配字符串相似度算法(各个语言版本)

java 同时被 2 个专栏收录
60 篇文章 0 订阅
5 篇文章 0 订阅

原文链接:https://blog.csdn.net/baidu_23086307/article/details/53020566

 

C++版

#include <iostream>
#include <vector>
#include <string>
using namespace std;

//算法
int ldistance(const string source,const string target)
{
    //step 1

    int n=source.length();
    int m=target.length();
    if (m==0) return n;
    if (n==0) return m;
    //Construct a matrix
    typedef vector< vector<int> >  Tmatrix;
    Tmatrix matrix(n+1);
    for(int i=0; i<=n; i++)  matrix[i].resize(m+1);

    //step 2 Initialize

    for(int i=1;i<=n;i++) matrix[i][0]=i;
    for(int i=1;i<=m;i++) matrix[0][i]=i;

     //step 3
     for(int i=1;i<=n;i++)
     {
        const char si=source[i-1];
        //step 4
        for(int j=1;j<=m;j++)
        {

            const char dj=target[j-1];
            //step 5
            int cost;
            if(si==dj){
                cost=0;
            }
            else{
                cost=1;
            }
            //step 6
            const int above=matrix[i-1][j]+1;
            const int left=matrix[i][j-1]+1;
            const int diag=matrix[i-1][j-1]+cost;
            matrix[i][j]=min(above,min(left,diag));

        }
     }//step7
      return matrix[n][m];
}
int main(){
    string s;
    string d;
    cout<<"source=";
    cin>>s;
    cout<<"diag=";
    cin>>d;
    int dist=ldistance(s,d);
    cout<<"dist="<<dist<<endl;
}

 

java版

package io;

public class Test1 {
         private int compare(String str, String target) {
                int d[][]; // 矩阵
                int n = str.length();
                int m = target.length();
                int i; // 遍历str的
                int j; // 遍历target的
                char ch1; // str的
                char ch2; // target的
                int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1

                if (n == 0) {return m;}  
                if (m == 0) { return n; }
                d = new int[n + 1][m + 1];
                for (i = 0; i <= n; i++) { // 初始化第一列
                    d[i][0] = i;
                }
                for (j = 0; j <= m; j++) { // 初始化第一行
                    d[0][j] = j;
                }
                for (i = 1; i <= n; i++) { // 遍历str
                    ch1 = str.charAt(i - 1);
                    // 去匹配target
                    for (j = 1; j <= m; j++) {
                        ch2 = target.charAt(j - 1);
                        if (ch1 == ch2) {
                            temp = 0;
                        } else {
                            temp = 1;
                        }
                        // 左边+1,上边+1, 左上角+temp取最小
                        d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp);
                    }
                }
                return d[n][m];
            }
            private int min(int one, int two, int three) {
                return (one = one < two ? one : two) < three ? one : three;
            }
            /**
             * 获取两字符串的相似度
             * 
             * @param str
             * @param target
             * 
             * @return
             */
            public float getSimilarityRatio(String str, String target) {
                return 1 - (float) compare(str, target) / Math.max(str.length(), target.length());
            }
            public static void main(String[] args) {
                Test1 t=new Test1();
               String tag="MySQL Server 5.6";//正确的
               String test1="mysql";
               String test2="MySQL";
               String test3="MySQL Server";
               String test4="MySQL 5.6";
               String test5="Server 5.6";
               String test6="SQL Server 5.6";
               String test7="MySQL Server 5.6";
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test1, tag));
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test2, tag));
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test3, tag));
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test4, tag));
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test5, tag));
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test6, tag));
                System.out.println("similarityRatio=" + t.getSimilarityRatio(test7, tag));
            }
}

JavaScript版

function compare(x, y) {  
    var z = 0;  
    var s = x.length + y.length;;  

    x.sort();  
    y.sort();  
    var a = x.shift();  
    var b = y.shift();  

    while(a !== undefined && b !== undefined) {  
        if (a === b) {  
            z++;  
            a = x.shift();  
            b = y.shift();  
        } else if (a < b) {  
            a = x.shift();  
        } else if (a > b) {  
            b = y.shift();  
        }  
    }  
    return z/s * 200;  
}  

console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello']))  
console.log(compare(['123', '中文', 'hello'], ['123', '中文', 'hello'].sort()))
  •  

VBScript版

Function GetLevenshteinDistince(str1, str2)
    Dim x, y, A, B, C, K
    Dim Matrix()
    ReDim Matrix(Len(str2), Len(str1))

    '初始化第一行和第一列
    For x = 0 To UBound(Matrix, 1)
        Matrix(x, 0) = x
    Next
    For y = 0 To UBound(Matrix, 2)
        Matrix(0, y) = y
    Next

    '填充矩阵
    For x = 1 To UBound(Matrix, 1)
        For y = 1 To UBound(Matrix, 2)
            If (Mid(str1, Matrix(0, y), 1) = Mid(str2, Matrix(x, 0), 1)) Then
                C = Matrix(x -1 ,y - 1)
            Else
                C = Matrix(x -1 ,y - 1) + 1
            End If

            A = Matrix(x - 1, y) + 1
            B = Matrix(x, y - 1) + 1

            If (A =< B and A =< C) Then Matrix(x, y) = A
            If (B =< C and B =< A) Then Matrix(x, y) = B
            If (C =< A and C =< B) Then Matrix(x, y) = C
        Next
    Next

    '计算 LD 值
    If (Len(str1) > Len(str2)) Then
        K = Len(str1)
    Else
        K = Len(str2)
    End If

    GetLevenshteinDistince = FormatNumber(1 - (Matrix(Len(str2), Len(str1)) / K), 3, True)
End Function
  • 0
    点赞
  • 0
    评论
  • 9
    收藏
  • 扫一扫,分享海报

©️2022 CSDN 皮肤主题:技术黑板 设计师:CSDN官方博客 返回首页
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值