高效比对,返回最短编辑距离算法匹配度最高的数据

        #region 高效比对返回匹配度最高的数据
        /// <summary>
        /// 高效比对返回匹配度最高的数据
        /// </summary>
        /// <param name="sourceList">源数据</param>
        /// <param name="targetList">目标数据</param>
        /// <returns></returns>
        public static List<MapToData> GetAutoMapData(List<MapToData> sourceList, List<MapToData> targetList)
        {
            #region 高效计算匹配
            List<MapToData> resultList = new List<MapToData>();
            Parallel.For(0, sourceList.Count, i =>
            {
                var sourceValue = sourceList[i].key;
                foreach (var item in targetList)
                {
                    var targetValue = item.key;
                    var jsonObject = item.value;
                    int matchNum = LevenshteinDistance(sourceValue, targetValue);
                    resultList.Add(new MapToData { key = sourceValue, value = jsonObject, match = matchNum });
                }
            });

            var q = from p in resultList
                    where p != null
                    orderby p.match descending
                    group new { p.key, p.value, p.match } by p.key into g
                    select new MapToData
                    {
                        key = g.FirstOrDefault().key,
                        value = g.FirstOrDefault().value,
                        match = g.FirstOrDefault().match
                    };
            return q.ToList();
            #endregion
        }
        #endregion

        #region LD最短编辑距离算法

        /// <summary>
        /// LD最短编辑距离算法
        /// </summary>
        /// <param name="source">源字符串</param>
        /// <param name="target">目标字符串</param>
        /// <returns></returns>
        public static int LevenshteinDistance(string source, string target)
        {
            int cell = source.Length;
            int row = target.Length;
            if (cell == 0)
            {
                return row;
            }
            if (row == 0)
            {
                return cell;
            }
            int[,] matrix = new int[row + 1, cell + 1];
            for (var i = 0; i <= cell; i++)
            {
                matrix[0, i] = i;
            }
            for (var j = 1; j <= row; j++)
            {
                matrix[j, 0] = j;
            }
            var tmp = 0;
            for (var k = 0; k < row; k++)
            {
                for (var l = 0; l < cell; l++)
                {
                    if (source[l].Equals(target[k]))
                        tmp = 0;
                    else
                        tmp = 1;
                    matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
                }
            }
            return matrix[row, cell];
        }
        #endregion

    public class MapToData
    {
        /// <summary>
        /// 要匹配的字符串
        /// </summary>
        public string key = "";

        /// <summary>
        /// 匹配的结果
        /// </summary>
        public object value = new object();
        /// <summary>
        /// 匹配度
        /// </summary>
        public int match = 0;
    }

转载于:https://www.cnblogs.com/smartsmile/p/6234064.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值