1.伪代码
- 计算待测样本与所有训练样本的距离;
- 根据距离大小排序,找出距离前k个的近邻(近邻实际数量可能大于k);
- 基于找到的近邻计算类概率分布,并依此确定待测样本的预测类属性值。
2.代码
package weka.classifiers.xwq;
import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.Instances;
public class KNN_xu extends Classifier
{
/**
* 训练集
*/
public Instances m_Train;
/**
* 参数K
*/
public int m_K;
@Override
public void buildClassifier(Instances data) throws Exception
{
// TODO Auto-generated method stub
m_Train = new Instances(data);
m_K = 10;
}
public double[] distributionForInstance(Instance instance) throws Exception
{
//calculate the distance between instance and all train instance
int numInstance = m_Train.numInstances();
double []distance = new double[numInstance];
for (int i = 0; i < numInstance; i++)
{
Instance trainInstance &