拼写纠错-C#实现

好久没用C#写项目了,语法都忘了:( 项目是c#写的,也只好重新学习咯。
拼写纠错的两个核心要素:数据字典和BK树,然后用编辑距离来度量两个词距离。纠错时,在构建好的BK树上查找给定距离d的节点集合,然后输出即可。
这里写图片描述

BK树类:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace PYcheck
{
    public delegate int DistanceFunction(object o1, object o2);  

    public class Node<T> 
    {
        public T item;
        public Dictionary<int, Node<T>> children;

        public Node(T item) 
        {
            this.item = item;
            this.children = new Dictionary<int, Node<T>>();
        }
    }   

    class BKTree<T>
    {
        private Node<T> rootNode;
        private DistanceFunction distanceFunction;
        private int length;
        private int modCount;

        public BKTree(T t, DistanceFunction distanceFunction = null)
        {
            if (distanceFunction == null)
            {
                throw new Exception("distanceFunction cannot be null. ");
            }

            rootNode = new Node<T>(t);
            this.distanceFunction = distanceFunction;
            length = 0;
            modCount = 0;
        }

        public bool AddNode(T t)
        {
            if (t == null)
                throw new NullReferenceException();

            if (rootNode == null)
            {
                rootNode = new Node<T>(t);
                length = 1;
                modCount++; // Modified tree by adding root.
                return true;
            }

            Node<T> parentNode = rootNode;
            int distance;
            while ((distance = distanceFunction(parentNode.item, t)) != 0 || !t.Equals(parentNode.item))
            {
                try
                {
                    Node<T> childNode = parentNode.children[distance];
                    parentNode = childNode;
                }
                catch(KeyNotFoundException ex)
                {
                    parentNode.children.Add(distance, new Node<T>(t));
                    length++;
                    modCount++;
                    return true;
                }                
            }

            return false;
        }

        public HashSet<T> Search(T t, int radius)
        {
            HashSet<T> res = new HashSet<T>();
            if(rootNode != null)
            {
                Query(rootNode, t, radius, ref res);
            }
            return res;
        }

        private void Query(Node<T> node,T t, int radius, ref HashSet<T> res)
        {
            int distance = this.distanceFunction(node.item, t);
            if (distance <= radius)
            {
                res.Add(node.item);
            }
            for(int i = Math.Max(distance - radius, 0); i <= distance + radius; i++)
            {
                try
                {
                    Node<T> child = node.children[i];
                    Query(child, t, radius, ref res);
                }                
                catch (KeyNotFoundException ex)
                {
                    continue;
                }
            }
        }        
    }
}

距离函数:

        /// <summary>
        /// 计算两个字符串的编辑距离
        /// </summary>
        /// <param name="first"></param>
        /// <param name="second"></param>
        /// <returns></returns>
        static int LevenshteinDistance(object obj1, object obj2)
        {
            string first = obj1 as string;
            string second = obj2 as string;

            if (first.Length > second.Length)
            {
                string temp = first;
                first = second;
                second = temp;
            }
            if (first.Length == 0)
                return second.Length;

            if (second.Length == 0)
                return first.Length;

            int first_length = first.Length + 1;
            int second_length = second.Length + 1;

            int[,] distance_matrix = new int[first_length, second_length];
        for (int i = 0; i < second_length; i++)
            {
                distance_matrix[0, i] = i;
            }

            for (int j = 1; j < first_length; j++)
            {
                distance_matrix[j, 0] = j;
            }

            for (int i = 1; i < first_length; i++)
            {
                for (int j = 1; j < second_length; j++)
                {
                    int deletion = distance_matrix[i - 1, j] + 1;
                    int insertion = distance_matrix[i, j - 1] + 1;
                    int substitution = distance_matrix[i - 1, j - 1];
                    if (first[i - 1] != second[j - 1])
                        substitution += 1;
                    int temp = Math.Min(insertion, deletion);
                    distance_matrix[i, j] = Math.Min(temp, substitution);
                }
            }

            return distance_matrix[first_length - 1, second_length - 1];
        }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值