机器学习——k-NN思想及实现(基于Java)

k近邻法(k-nearest neighbors,k-NN)是一种基本分类与回归方法。
输入:实例的特征向量,对应于特征空间的点;
输出:实例的类别。
在分类时,对新的实例,根据其k个最近邻的训练实例的类别,通过多数表决等方式进行预测。
在回归时,对新的实例,根据其k个最近邻的训练实例的平均值决定。
两者的代码大同小异,当前实现的分类问题中的k近邻算法。
数据准备:下载地址
相关成员变量声明:

	/**
	 * Manhattan distance.
	 */
	public static final int MANHATTAN = 0;

	/**
	 * Euclidean distance.
	 */
	public static final int EUCLIDEAN = 1;

	/**
	 * The distance measure.
	 */
	public int distanceMeasure = EUCLIDEAN;

	/**
	 * A random instance.
	 */
	public static final Random random = new Random();

	/**
	 * The number of neighbors.
	 */
	int numNeighbors = 5;

	/**
	 * The whole data set.
	 */
	Instances dataset;

	/**
	 * The training set. Represented by the indices of the data.
	 */
	int[] trainingSet;

	/**
	 * The testing set. Represented by the indices of the data.
	 */
	int[] testingSet;

	/**
	 * The predictions.
	 */
	int[] predictions;

	/**
	 * The distance between an instance and the training set.
	 */
	double[] distances;

类的构造函数及实现数据的读入

public KnnClassification(String paraFilename) {
		try {
			FileReader fileReader = new FileReader(paraFilename);
			dataset = new Instances(fileReader);
			System.out.println("The number of totall instances is " + dataset.numInstances());
			// The last attribute is the decision class.
			dataset.setClassIndex(dataset.numAttributes() - 1);
			System.out.println("The data set is: " + dataset.toString());
			fileReader.close();
		} catch (Exception ee) {
			System.out.println("Error occurred while trying to read \'" + paraFilename
					+ "\' in KnnClassification constructor.\r\n" + ee);
			System.exit(0);
		} // Of try
	}// Of the first constructor

数据打印:

The data set is: @relation iris

@attribute sepallength numeric
@attribute sepalwidth numeric
@attribute petallength numeric
@attribute petalwidth numeric
@attribute class {Iris-setosa,Iris-versicolor,Iris-virginica}

@data
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3,1.4,0.1,Iris-setosa
4.3,3,1.1,0.1,Iris-setosa
5.8,4,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5,3,1.6,0.2,Iris-setosa
5,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5,3.3,1.4,0.2,Iris-setosa
7,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5,2,3.5,1,Iris-versicolor
5.9,3,4.2,1.5,Iris-versicolor
6,2.2,4,1,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3,5,1.7,Iris-versicolor
6,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6,2.7,5.1,1.6,Iris-versicolor
5.4,3,4.5,1.5,Iris-versicolor
6,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3,4.1,1.3,Iris-versicolor
5.5,2.5,4,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3,4.6,1.4,Iris-versicolor
5.8,2.6,4,1.2,Iris-versicolor
5,2.3,3.3,1,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3,5.8,2.2,Iris-virginica
7.6,3,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3,5.5,2.1,Iris-virginica
5.7,2.5,5,2,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6,2.2,5,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2,Iris-virginica
7.7,2.8,6.7,2,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6,3,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3,5.2,2.3,Iris-virginica
6.3,2.5,5,1.9,Iris-virginica
6.5,3,5.2,2,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3,5.1,1.8,Iris-virginica

由上面的打印结果,我们发现数据是按照类别进行排序的,为了数据有更好的代表性,我们需要打乱数据集。
所以,我们先将数据集的索引打乱:

/**
	 * 
	 *********************
	 * @Title: getRandomIndices
	 * @Description: TODO(Get a random indices for data randomization.)
	 *
	 * @param paraLength The length of the sequence.
	 * @return An array of indices, e.g., {4, 3, 1, 5, 0, 2} with length 6.
	 *********************
	 *
	 */
	public static int[] getRandomIndices(int paraLength) {
		int[] resultIndices = new int[paraLength];

		// Step 1. Initialize.
		for (int i = 0; i < paraLength; i++) {
			resultIndices[i] = i;
		} // Of for i

		// Step 2. Randomly swap.
		int tempFirst, tempSecond, tempValue;
		for (int i = 0; i < paraLength; i++) {
			// Generate two random indices.
			tempFirst = random.nextInt(paraLength);
			tempSecond = random.nextInt(paraLength);

			// Swap.
			tempValue = resultIndices[tempFirst];
			resultIndices[tempFirst] = resultIndices[tempSecond];
			resultIndices[tempSecond] = tempValue;
		} // Of for i
		return resultIndices;
	}// Of getRandomIndices

将打乱后的数据集进行分割:

/**
	 * 
	 *********************
	 * @Title: splitTrainingTesting
	 * @Description: TODO(Split the data into training and testing parts.)
	 *
	 * @param paraTrainingFraction The fraction of the training set.
	 *********************
	 *
	 */
	public void splitTrainingTesting(double paraTrainingFraction) {
		int tempSize = dataset.numInstances();
		int[] tempIndices = getRandomIndices(tempSize);
		int tempTrainingSize = (int) (tempSize * paraTrainingFraction);

		trainingSet = new int[tempTrainingSize];
		testingSet = new int[tempSize - tempTrainingSize];

		for (int i = 0; i < tempTrainingSize; i++) {
			trainingSet[i] = tempIndices[i];
		} // Of for i

		for (int i = 0; i < tempSize - tempTrainingSize; i++) {
			testingSet[i] = tempIndices[i + tempTrainingSize];
		} // Of for i
	}// Of splitTrainingTesting

至此,简单的数据处理就算是完成了,整个数据集被划分成了训练数据集和测试数据集。
由于k-NN算法没有具体的模型训练过程,所以可以跳过模拟,直接进行预测。
预测准备:
需要计算测试实例到训练实例的距离:
代码中举例了两种距离的计算方式——曼哈顿距离、欧式距离(的平方)
曼哈顿距离:
L 1 = ( x i , x j ) = ∑ l = 1 n ∣ x i ( l ) − x j ( l ) ∣ L_1=(x_i,x_j)=\sum\limits_{l=1}^n|x_i^{(l)}-x_j^{(l)}| L1=(xi,xj)=l=1nxi(l)xj(l)
欧式距离:
L 2 = ( x i , x j ) = ( ∑ l = 1 n ∣ x i ( l ) − x j ( l ) ∣ 2 ) 1 2 L_2=(x_i,x_j)=\begin{pmatrix}\sum\limits_{l=1}^n|x_i^{(l)}-x_j^{(l)}|^2\end{pmatrix}^{\frac{1}{2}} L2=(xi,xj)=(l=1nxi(l)xj(l)2)21
补充:
明可夫斯基距离:
L p = ( x i , x j ) = ( ∑ l = 1 n ∣ x i ( l ) − x j ( l ) ∣ p ) 1 p L_p=(x_i,x_j)=\begin{pmatrix}\sum\limits_{l=1}^n|x_i^{(l)}-x_j^{(l)}|^p\end{pmatrix}^{\frac{1}{p}} Lp=(xi,xj)=(l=1nxi(l)xj(l)p)p1
p = ∞ p=\infty p=时,称为切比雪夫据距离:
L p = ( x i , x j ) = max ⁡ l ( ∣ x i ( l ) − x j ( l ) ∣ ) L_p=(x_i,x_j)=\max\limits_l(|x_i^{(l)}-x_j^{(l)}|) Lp=(xi,xj)=lmax(xi(l)xj(l))
参数的补充:
超参数:在算法运行前需要决定的参数;
模型参数:算法运行过程中学习的参数。
在k-NN算法中的参数k,就是典型的超参数。
因为不同的 p p p,会影响到最近邻实例的选择,所以 p p p也可做为k-NN算法的超参数。

距离计算的代码:

	/**
	 * 
	 *********************
	 * @Title: distance
	 * @Description: TODO(The distance between two instances.)
	 *
	 * @param paraI The index of the first instance.
	 * @param paraJ The index of the second instance.
	 * @return The distance.
	 *********************
	 *
	 */
	public double distance(int paraI, int paraJ) {
		double resultDistance = 0;
		double tempDifference;
		switch (distanceMeasure) {
		case MANHATTAN:
			for (int i = 0; i < dataset.numAttributes() - 1; i++) {
				tempDifference = dataset.instance(paraI).value(i) - dataset.instance(paraJ).value(i);
				if (tempDifference < 0) {
					resultDistance -= tempDifference;
				} else {
					resultDistance += tempDifference;
				} // Of if
			} // Of for i
			break;

		case EUCLIDEAN:
			for (int i = 0; i < dataset.numAttributes() - 1; i++) {
				tempDifference = dataset.instance(paraI).value(i) - dataset.instance(paraJ).value(i);
				resultDistance += tempDifference * tempDifference;
			} // Of for i
			break;
		default:
			System.out.println("Unsupported distance measure: " + distanceMeasure);
			break;
		}// Of switch
		return resultDistance;
	}// Of distance

补充:
设置距离计算类型:

	/**
	 ********************
	 * Set the distance measure.
	 * 
	 * @param paraType The type.
	 *********************
	 */
	public void setDistanceMeasure(int paraType) {
		switch (paraType) {
		case 0:
			distanceMeasure=MANHATTAN;
			break;
		case 1:
			distanceMeasure=EUCLIDEAN;
			break;
		default:
			System.out.println("Unsupported distance measure: " + paraType);
			break;
		}//Of switch
	}//Of setDistanceMeasure

计算完距离之后,就是选择最近的k个实例了:

	/**
	 * 
	 *********************
	 * @Title: computeNearests
	 * @Description: TODO(Compute the nearest k neighbors.)
	 *
	 * @param paraCurrent current instance.
	 * @return The indices of the nearest instances.
	 *********************
	 *
	 */
	public int[] computeNearests(int paraCurrent) {
		int[] resultNearests;

		// Compute all distances to avoid redundant compute.
		double[] tempDistances = new double[trainingSet.length];
		for (int i = 0; i < trainingSet.length; i++) {
			tempDistances[i] = distance(paraCurrent, trainingSet[i]);
		} // Of for i

		// resultNearests = simpleSelect(tempDistances);
		// System.out.println("The nearest of " + paraCurrent + " are: " +
		// Arrays.toString(resultNearests));

		resultNearests = selectWithHeap(tempDistances);
		System.out.println("The nearest of " + paraCurrent + " are: " + Arrays.toString(resultNearests));

		return resultNearests;
	}// Of computeNearests

简单选择方式:

/**
	 * 
	 *********************
	 * @Title: simpleSelect
	 * @Description: TODO(Select the nearest indices.)
	 *
	 * @param paraDistances The distance.
	 * @return A array of result.
	 *********************
	 *
	 */
	public int[] simpleSelect() {
		int[] resultNearests = new int[numNeighbors];
		boolean[] tempSelected = new boolean[trainingSet.length];
		double tempMinimalDistance;
		int tempMinimalIndex = 0;

		for (int i = 0; i < numNeighbors; i++) {
			tempMinimalDistance = Double.MAX_VALUE;

			for (int j = 0; j < trainingSet.length; j++) {
				if (tempSelected[j])
					continue;

				if (paraDistances[j] < tempMinimalDistance) {
					tempMinimalDistance = paraDistances[j];
					tempMinimalIndex = j;
				} // Of if
			} // Of for j
			resultNearests[i] = trainingSet[tempMinimalIndex];
			tempSelected[tempMinimalIndex] = true;
		} // Of for i

		return resultNearests;
	}// Of simpleSelect

堆选择方式:

	/**
	 * 
	 *********************
	 * @Title: adjustHeap
	 * @Description: TODO(Adjust the heap.)
	 *
	 * @param paraStartIndex The start of the index that need to adjust.
	 * @param paraLength     The length of the adjusted sequence.
	 * @param paraDistances  The array of distance.
	 * @param paraIndexes    The index of distance.
	 *********************
	 *
	 */
	public void adjustHeap(int paraStartIndex, int paraLength, double[] paraDistances, int[] paraIndexes) {
		int tempParentIndex = paraStartIndex;
		double tempDistance = paraDistances[paraStartIndex];
		int tempIndex = paraIndexes[paraStartIndex];

		for (int i = paraStartIndex * 2 + 1; i < paraLength; i = i * 2 + 1) {
			// Select the smaller.
			if (i + 1 < paraLength && paraDistances[i + 1] < paraDistances[i])
				i++;
			if (tempDistance > paraDistances[i]) {
				// Update the index and distance.
				paraIndexes[tempParentIndex] = paraIndexes[i];
				paraDistances[tempParentIndex] = paraDistances[i];
				tempParentIndex = i;
			} else {
				break;
			} // Of if
		} // Of for i

		paraDistances[tempParentIndex] = tempDistance;
		paraIndexes[tempParentIndex] = tempIndex;
	}// Of adjustHeap

	/**
	 * 
	 *********************
	 * @Title: selectWithHeap
	 * @Description: TODO(Select the nearest indices.)
	 *
	 * @param paraDistances The distance.
	 * @return A array of result.
	 *********************
	 *
	 */
	public int[] selectWithHeap() {
		int[] resultNearests = new int[numNeighbors];

		// Initialize the indexes.
		int[] tempIndexes = new int[trainingSet.length];
		for (int i = 0; i < trainingSet.length; i++) {
			tempIndexes[i] = i;
		} // Of for i

		// Build the heap.
		int tempLength = paraDistances.length;
		for (int i = trainingSet.length / 2 - 1; i >= 0; i--) {
			adjustHeap(i, tempLength, paraDistances, tempIndexes);
		}

		for (int i = 0; i < numNeighbors; i++) {
			resultNearests[i] = trainingSet[tempIndexes[0]];
			tempIndexes[0] = tempIndexes[tempLength - i - 1];
			paraDistances[0] = paraDistances[tempLength - i - 1];
			adjustHeap(0, tempLength - i - 1, paraDistances, tempIndexes);
		}
		return resultNearests;
	}// Of selectWithHeap

基于堆排序算法的时间复杂度: O ( n + k log ⁡ n ) O(n+k\log n) O(n+klogn)
基于选择排序算法的选择时间复杂度: O ( k n ) O(kn) O(kn)
测试结果图:
在这里插入图片描述
在一般情况下,后者的时间开销都会大于前者。
在选择最近k个实例之后就是投票了,根据选择的index,找到对应的分类,并使该类别的计数器++,最后是票数最多的获胜:

	/**
	 * 
	 *********************
	 * @Title: simpleVoting
	 * @Description: TODO(Voting using the instances.)
	 *
	 * @param paraNeihbors The indices of the neighbors.
	 * @return The predicted label.
	 *********************
	 *
	 */
	public int simpleVoting(int[] paraNeihbors) {
		int[] tempVotes = new int[dataset.numClasses()];
		for (int i = 0; i < paraNeihbors.length; i++) {
			tempVotes[(int) dataset.instance(paraNeihbors[i]).classValue()]++;
		} // Of for i

		int tempMaximalVoting = 0;
		int tempMaximalVotingIndex = 0;
		for (int i = 0; i < dataset.numClasses(); i++) {
			if (tempVotes[i] > tempMaximalVoting) {
				tempMaximalVoting = tempVotes[i];
				tempMaximalVotingIndex = i;
			} // Of if
		} // Of for i

		return tempMaximalVotingIndex;
	}// Of simpleVoting

这就充分体现了“近朱者赤,近墨者黑”。
补充:
根据距离的权重进行投票:

	/**
	 ********************
	 * Voting using the instances with distance.
	 *
	 * @param paraNeihbors The indices of the neighbors.
	 * @return The predicted label.
	 *********************
	 */
	public int weightedVoting(int[] paraNeihbors) {
		double[] tempVotes = new double[dataset.numClasses()];
		double tempDistance;
		for (int i = 0; i < paraNeihbors.length; i++) {
			tempDistance = distances[trainingSet.length - i - 1];
			tempVotes[(int) dataset.instance(paraNeihbors[i]).classValue()] += 1 / (tempDistance + 1);
		} // Of for i

		double tempMaximalVoting = 0;
		int tempMaximalVotingIndex = 0;
		for (int i = 0; i < dataset.numClasses(); i++) {
			if (tempVotes[i] > tempMaximalVoting) {
				tempMaximalVoting = tempVotes[i];
				tempMaximalVotingIndex = i;
			} // Of if
		} // Of for i

		return tempMaximalVotingIndex;
	}// Of weightedVoting

(注意:我把测试实例到训练实例的距离数组写成了一个成员变量。)
最后,就是预测和获取预测准确度了:

	/**
	 * 
	 *********************
	 * @Title: predict
	 * @Description: TODO(Predict for the whole testing set. The results are stored
	 *               in predictions.) #see predictions.
	 *********************
	 *
	 */
	public void predict() {
		predictions = new int[testingSet.length];
		for (int i = 0; i < predictions.length; i++) {
			predictions[i] = predict(testingSet[i]);
		} // Of for i
	}// Of predict
	
	/**
	 * 
	 *********************
	 * @Title: predict
	 * @Description: TODO(Predict for given instance.)
	 *
	 * @return The prediction.
	 *********************
	 *
	 */
	public int predict(int paraIndex) {
		int[] tempNeighbors = computeNearests(paraIndex);
		int resultPrediction = simpleVoting(tempNeighbors);

		return resultPrediction;
	}// Of predict
	
	/**
	 * 
	 *********************
	 * @Title: getAccuracy
	 * @Description: TODO(Get the accuracy of the classifier.)
	 *
	 * @return The accuracy.
	 *********************
	 *
	 */
	public double getAccuracy() {
		// A double divides an int gets another double.
		double tempCorrect = 0;
		for (int i = 0; i < predictions.length; i++) {
			if (predictions[i] == dataset.instance(testingSet[i]).classValue()) {
				tempCorrect++;
			} // Of if
		} // Of for i

		return tempCorrect / testingSet.length;
	}// Of getAccuracy

附上主函数:

/**
	 *********************
	 * The entrance of the program.
	 * 
	 * @param args Not used now.
	 *********************
	 */
	public static void main(String args[]) {
		KnnClassification tempClassifier = new KnnClassification("E:/Weka-3-8-6/data/iris.arff");
		tempClassifier.splitTrainingTesting(0.8);
		tempClassifier.predict();
		System.out.println("The accuracy of the classifier is: " + tempClassifier.getAccuracy());

		// for (int i = 4; i < 9; i++) {
		// tempClassifier.setNumNeighbors(i);
		// System.out.println("------------The test with " + tempClassifier.numNeighbors
		// + " neighors------------");
		// tempClassifier.predict();
		// System.out.println("The accuracy of the classifier is: " +
		// tempClassifier.getAccuracy());
		// } // Of for i

	}// Of main

其中for循环是对k的一个简单测试:
在这里插入图片描述
但数据集太小了,这样做意义不大~ 不过还是要提醒,如果发现随着k增大,准确度仍然在增大,说明当前测试到k的边界值只是有可能是最适合的参数,还需要扩大k的范围继续测。

小结:通过这次训练,发现了自己很多短板,比如不会用Java画图,不了解Instance、Attribute,导致我想尝试尝试归一化处理并将处理后的数据作为新的属性添加到原数据上也没有成功,还需要慢慢摸索下文档~

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值