之前在接触Python的时候,用过这个算法,今天无聊就想用C#测试下。 K最近邻(k-Nearest Neighbor,KNN)分类算法,是一个理论上比较成熟的方法,也是最简单的机器学习算法之一。该方法的思路是:在特征空间中,如果一个样本附近的k个最近(即特征空间中最邻近)样本的大多数属于某一个类别,则该样本也属于这个类别。
int sampleCount = sampleList.Count;
int unclassifyCount = unclassifyList.Count;
//
for (int i = 0; i < unclassifyCount; i++)
{
Tuple<string, double>[] tupleArray = new Tuple<string, double>[sampleCount];
for (int j = 0; j < sampleCount; j++)
{
//计算欧式距离===>暂时还不知道这个距离的意义。Sqrt(x2+y2+m2+n2)
double distance = CalculateDistance(sampleList[j], unclassifyList[i]);
string species = sampleList[j].Species;
tupleArray[j] = Tuple.Create(species, distance);
}
//取5个样品列子
IEnumerable<Tuple<string, double>> selector = tupleArray.OrderBy(t => t.Item2).Take(k);
Dictionary<string, int> dictionary = new Dictionary<string, int>();
foreach (Tuple<string, double> tuple in selector)
{
if (dictionary.ContainsKey(tuple.Item1))
{
dictionary[tuple.Item1]++;
}
else
{
dictionary.Add(tuple.Item1, 1);
}
}
// 倒叙取种类。就是取类别最多的一种
//setosa 5
//versicolor 4
//virginica 3
// ===>setosa
IEnumerable<KeyValuePair<string, int>> keyValuePair = dictionary.OrderByDescending(t => t.Value).Take(1);
foreach (KeyValuePair<string, int> kvp in keyValuePair)
{
unclassifyList[i].Species = kvp.Key;
}
//
sampleList.Add(unclassifyList[i]);
sampleCount++;
}
完整代码+鸢尾花常用数据集
密码:p7a6