day16--KNN分类器（day51）

最新推荐文章于 2024-04-15 23:35:28 发布

lxl。。

最新推荐文章于 2024-04-15 23:35:28 发布

阅读量140

点赞数

分类专栏： java学习日记文章标签： java KNN

本文链接：https://blog.csdn.net/lxl513513/article/details/124257066

版权

java学习日记专栏收录该内容

22 篇文章 1 订阅

订阅专栏

1.KNN(K-Nearest Nerighbor)

KNN：邻近算法，所谓K最近邻，就是K个最近的邻居的意思，说的是每个样本都可以用它最接近的K个邻近值来代表。近邻算法就是将数据集合中每一个记录进行分类的方法

作用：主要用于分类，对未知事物的识别。
核心思想：如果一个样本在特征空间中的K个最相邻的样本中的大多数属于某一个类别，则该样本也属于这个类别，并具有这个类别上样本的特性。

算法流程：

准备数据，对数据进行预处理
计算测试样本点到其他每个样本点的距离
对每个距离进行排序，然后选择出距离最小的K个点
对K个点所属的类别进行比较，根据少数服从多数的原则，将测试样本点归入在K个点中占比最高的那一类

2.KNN分离器代码实现

package machinelearning.knn;

import java.io.FileReader;
import java.util.Arrays;
import java.util.Random;

import weka.core.*;

public class KnnClassification {

	public static final int MANHATTAN = 0;
	
	public static final int EUCLIDEAN = 1;
	// The distance measure
	public int distanceMeasure = EUCLIDEAN;

	//A random instance
	public static final Random random = new Random();

	//The number of neighbors
	int numNeighbors = 7;

	//The whole dataset
	Instances dataset;
	
	//The training set. Represented by the indices of the data.
	int[] trainingSet;

	//The testing set. Represented by the indices of the data.
	int[] testingSet;

	int[] predictions;

	//构造方法
	public KnnClassification(String paraFilename) {
		try {
			FileReader fileReader = new FileReader(paraFilename);
			dataset = new Instances(fileReader);
			// The last attribute is the decision class.
			dataset.setClassIndex(dataset.numAttributes() - 1);
			fileReader.close();
		} catch (Exception ee) {
			System.out.println("Error occurred while trying to read \'" + paraFilename
					+ "\' in KnnClassification constructor.\r\n" + ee);
			System.exit(0);
		} // of try
	}// of the first constructor

	public static int[] getRandomIndices(int paraLength) {
		int[] resultIndices = new int[paraLength];

		// 1.初始化
		for (int i = 0; i < paraLength; i++) {
			resultIndices[i] = i;
		} // of for i

		// 2.随机交互
		int tempFirst, tempSecond, tempValue;
		for (int i = 0; i < paraLength; i++) {
			// 产生两个随机索引
			tempFirst = random.nextInt(paraLength);
			tempSecond = random.nextInt(paraLength);

			// 交互两个索引下标的值
			tempValue = resultIndices[tempFirst];
			resultIndices[tempFirst] = resultIndices[tempSecond];
			resultIndices[tempSecond] = tempValue;
		} // of for i

		return resultIndices;
	}// of getRandomIndices

	public void splitTrainingTesting(double paraTrainingFraction) {
		int tempSize = dataset.numInstances();
		int[] tempIndices = getRandomIndices(tempSize);
		int tempTrainingSize = (int) (tempSize * paraTrainingFraction);

		trainingSet = new int[tempTrainingSize];
		testingSet = new int[tempSize - tempTrainingSize];

		for (int i = 0; i < tempTrainingSize; i++) {
			trainingSet[i] = tempIndices[i];
		} // of for i

		for (int i = 0; i < tempSize - tempTrainingSize; i++) {
			testingSet[i] = tempIndices[tempTrainingSize + i];
		} // of for i
	}// of splitTrainingTesting

	public void predict() {
		predictions = new int[testingSet.length];
		for (int i = 0; i < predictions.length; i++) {
			predictions[i] = predict(testingSet[i]);
		} // of for i
	}// of predict

	public int predict(int paraIndex) {
		int[] tempNeighbors = computeNearests(paraIndex);
		int resultPrediction = simpleVoting(tempNeighbors);

		return resultPrediction;
	}// of predict

	public double distance(int paraI, int paraJ) {
		double resultDistance = 0;
		double tempDifference;
		switch (distanceMeasure) {
		case MANHATTAN:
			for (int i = 0; i < dataset.numAttributes() - 1; i++) {
				tempDifference = dataset.instance(paraI).value(i) - dataset.instance(paraJ).value(i);
				if (tempDifference < 0) {
					resultDistance -= tempDifference;
				} else {
					resultDistance += tempDifference;
				} // of if
			} // of for i
			break;

		case EUCLIDEAN:
			for (int i = 0; i < dataset.numAttributes() - 1; i++) {
				tempDifference = dataset.instance(paraI).value(i) - dataset.instance(paraJ).value(i);
				resultDistance += tempDifference * tempDifference;
			} // of for i
			break;
		default:
			System.out.println("Unsupported distance measure: " + distanceMeasure);
		}// of switch

		return resultDistance;
	}// of distance

	
	public double getAccuracy() {
		// A double divides an int gets another double.
		double tempCorrect = 0;
		for (int i = 0; i < predictions.length; i++) {
			if (predictions[i] == dataset.instance(testingSet[i]).classValue()) {
				tempCorrect++;
			} // of if
		} // of for i

		return tempCorrect / testingSet.length;
	}// of getAccuracy

	public int[] computeNearests(int paraCurrent) {
		int[] resultNearests = new int[numNeighbors];
		boolean[] tempSelected = new boolean[trainingSet.length];
		double tempDistance;
		double tempMinimalDistance;
		int tempMinimalIndex = 0;

		// Select the nearest paraK indices.
		for (int i = 0; i < numNeighbors; i++) {
			tempMinimalDistance = Double.MAX_VALUE;

			for (int j = 0; j < trainingSet.length; j++) {
				if (tempSelected[j]) {
					continue;
				} // of if

				tempDistance = distance(paraCurrent, trainingSet[j]);
				if (tempDistance < tempMinimalDistance) {
					tempMinimalDistance = tempDistance;
					tempMinimalIndex = j;
				} // of if
			} // of for j

			resultNearests[i] = trainingSet[tempMinimalIndex];
			tempSelected[tempMinimalIndex] = true;
		} // of for i

		System.out.println("The nearest of " + paraCurrent + " are: " + Arrays.toString(resultNearests));
		return resultNearests;
	}// of computeNearests

	public int simpleVoting(int[] paraNeighbors) {
		int[] tempVotes = new int[dataset.numClasses()];
		for (int i = 0; i < paraNeighbors.length; i++) {
			tempVotes[(int) dataset.instance(paraNeighbors[i]).classValue()]++;
		} // of for i

		int tempMaximalVotingIndex = 0;
		int tempMaximalVoting = 0;
		for (int i = 0; i < dataset.numClasses(); i++) {
			if (tempVotes[i] > tempMaximalVoting) {
				tempMaximalVoting = tempVotes[i];
				tempMaximalVotingIndex = i;
			} // of if
		} // of for i

		return tempMaximalVotingIndex;
	}// of simpleVoting
	
	public static void main(String args[]) {
		KnnClassification tempClassifier = new KnnClassification("E:/Program Files (x86)/data/iris.arff");
		tempClassifier.splitTrainingTesting(0.8);
		tempClassifier.predict();
		System.out.println("The accuracy of the classifier is: " + tempClassifier.getAccuracy());
	}// of main

}// of class KnnClassification

运行结果

在这里插入图片描述

lxl。。

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
day16--KNN分类器（day51）

1.KNN(K-Nearest Nerighbor)KNN：邻近算法，所谓K最近邻，就是K个最近的邻居的意思，说的是每个样本都可以用它最接近的K个邻近值来代表。近邻算法就是将数据集合中每一个记录进行分类的方法作用：主要用于分类，对未知事物的识别。核心思想：如果一个样本在特征空间中的K个最相邻的样本中的大多数属于某一个类别，则该样本也属于这个类别，并具有这个类别上样本的特性。算法流程：准备数据，对数据进行预处理计算测试样本点到其他每个样本点的距离对每个距离进行排序，然后选择出距离最小的K个
复制链接

扫一扫

专栏目录