C# 字符串匹配的Jaro-Winkler 相似度算法

想要更精确地匹配输入字符串和选项,可以使用字符串相似性算法,例如 Levenshtein 距离或 Jaro-Winkler 相似度等算法。这些算法会考虑字符串之间的编辑距离、字符顺序等因素,从而更准确地计算字符串的相似性。下面会给出代码.使用方法就是:

  //参数1是待匹配的文本,参数2是样本数组.
  string bestMatch = JaroWinklerDistanceCalculate.Calculate(str, nameArry);
  Debug.Log(bestMatch);
using System;
using System.Collections;
using System.Collections.Generic;

public class JaroWinklerDistanceCalculate
{
    public static string Calculate(string compareStr, string[] compareStrs)
    {
        double bestSimilarity = 0;
        string bestMatch = "";

        foreach (string strItem in compareStrs)
        {
            double similarity = JaroWinklerDistance(compareStr, strItem);
            if (similarity > bestSimilarity)
            {
                bestSimilarity = similarity;
                bestMatch = strItem;
            }
        }
        return bestMatch;
    }
    static double JaroWinklerDistance(string s1, string s2)
    {
        double jaroDistance = JaroDistance(s1, s2);
        double prefixLength = GetCommonPrefixLength(s1, s2);
        double scalingFactor = 0.1;
        double jaroWinklerDistance = jaroDistance + prefixLength * scalingFactor * (1 - jaroDistance);
        return jaroWinklerDistance;
    }
    static double JaroDistance(string s1, string s2)
    {
        if (s1 == s2)
        {
            return 1.0;
        }
        int maxDistance = Math.Max(s1.Length, s2.Length) / 2 - 1;
        bool[] s1Matches = new bool[s1.Length];
        bool[] s2Matches = new bool[s2.Length];
        int matchingCharacters = 0;

        for (int i = 0; i < s1.Length; i++)
        {
            int start = Math.Max(0, i - maxDistance);
            int end = Math.Min(i + maxDistance + 1, s2.Length);
            for (int j = start; j < end; j++)
            {
                if (s2Matches[j])
                {
                    continue;
                }
                if (s1[i] != s2[j])
                {
                    continue;
                }
                s1Matches[i] = true;
                s2Matches[j] = true;
                matchingCharacters++;
                break;
            }
        }
        if (matchingCharacters == 0)
        {
            return 0.0;
        }
        int transpositions = 0;
        int k = 0;
        for (int i = 0; i < s1.Length; i++)
        {
            if (!s1Matches[i])
            {
                continue;
            }
            while (!s2Matches[k])
            {
                k++;
            }
            if (s1[i] != s2[k])
            {
                transpositions++;
            }
            k++;
        }
        double jaroDistance = (matchingCharacters / (double)s1.Length
                              + matchingCharacters / (double)s2.Length
                              + (matchingCharacters - transpositions / 2.0) / matchingCharacters) / 3.0;
        return jaroDistance;
    }

    static int GetCommonPrefixLength(string s1, string s2)
    {
        int commonPrefixLength = 0;
        int minLength = Math.Min(s1.Length, s2.Length);
        for (int i = 0; i < minLength; i++)
        {
            if (s1[i] != s2[i])
            {
                break;
            }
            commonPrefixLength++;
        }
        return commonPrefixLength;
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值