Levenshtein Distance + LCS 算法计算两个字符串的相似度

//LD最短编辑路径算法
public static int LevenshteinDistance(string source, string target) 
{
    int cell = source.Length;
    int row = target.Length;
    if (cell == 0) 
    {
        return row;
    }
    if (row == 0) 
    {
        return cell;
    }
    int[, ] matrix = new int[row + 1, cell + 1];
    for (var i = 0; i <= cell; i++) 
    {
        matrix[0, i] = i;
    }
    for (var j = 1; j <= row; j++) 
    {
        matrix[j, 0] = j;
    }
    var tmp = 0;
    for (var k = 0; k < row; k++) 
    {
        for (var l = 0; l < cell; l++) 
        {
            if (source[l].Equals(target[k])) 
                tmp = 0;
            else 
                tmp = 1;
            matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
        }
    }
    return matrix[row, cell];
}


//LCS最大公共序列算法
public static int LongestCommonSubsequence(string source, string target) 
{
    if (source.Length == 0 || target.Length == 0) 
        return 0;
    int len = Math.Max(target.Length, source.Length);
    int[, ] subsequence = new int[len + 1, len + 1];
    for (int i = 0; i < source.Length; i++) 
    {
        for (int j = 0; j < target.Length; j++) 
        {
            if (source[i].Equals(target[j])) 
                subsequence[i + 1, j + 1] = subsequence[i, j] + 1;
            else 
                subsequence[i + 1, j + 1] = 0;
        }
    }
    int maxSubquenceLenght = (from sq in subsequence.Cast < int > () select sq).Max < int > ();
    return maxSubquenceLenght;
}

//计算两个字符串相似度 数值越大越相似
public static float StringSimilarity(string source, string target) 
{
    var ld = LevenshteinDistance(source, target);
    var lcs = LongestCommonSubsequence(source, target);
    return ((float)lcs)/(ld+lcs);;
}
/// <summary>
/// 获取两个字符串的相似度(适合中文)
/// </summary>
/// <param name=”sourceString”>第一个字符串</param>
/// <param name=”str”>第二个字符串</param>
/// <returns></returns>
public static double SimilarityWith(string sourceString, string str)
{
	char[] ss = sourceString.ToCharArray();
	char[] st = str.ToCharArray();
	int t = 0; //命中
	int k = 0; //非命中
	foreach (var item in st)
	{
		if (ss.Contains(item))
		{
			t++;
		}
		else
		{
			k++;
		}
	}
	return (double)t / ((double)k + (double)t);
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值