# coding:utf-8
import sklearn.datasets
import sklearn.neighbors
import numpy.random
import matplotlib.pyplot
import matplotlib.colors
if __name__ == "__main__":
# Load iris dataset
iris = sklearn.datasets.load_iris()
# Split the dataset with sampleRatio
sampleRatio = 0.2
n_samples = len(iris.target)
sampleBoundary = int(n_samples * sampleRatio)
# Shuffle the whole data
shuffleIdx = range(n_samples)
numpy.random.shuffle(shuffleIdx)
# Make the training data
train_features = iris.data[shuffleIdx[:sampleBoundary]]
train_targets = iris.target[shuffleIdx[:sampleBoundary]]
# Make the testing data
test_features = iris.data[shuffleIdx[sampleBoundary:]]
test_targets = iris.target[shuffleIdx[sampleBoundary:]]
# Train
n_neighbors = 5 # 选5个最近邻
for weights in ['uniform', 'distance']: # 这个地方采用两种加权方式
kNeighborsClassifier = sklearn.neighbors.KNeighborsClassifier(n_neighbors, weights = weights)
kNeighborsClassifier.fit(train_features, train_targets)
# Test
predict_targets = kNeighborsClassifier.predict(test_features)
# Evaluation
n_test_samples = len(test_targets)
X = range(n_test_samples)
correctNum = 0
for i in X:
if(predict_targets[i] == test_targets[i]):
correctNum += 1
accuracy = correctNum * 1.0 / n_test_samples
print("K Neighbors Classifier (Iris) Accuracy [weight = '%s']:%.2f" %(weights, accuracy))
# Draw
cmap_bold = matplotlib.colors.ListedColormap(['red', 'blue', 'green'])
X_test = test_features[:, 2:4]
X_train = train_features[:, 2:4]
matplotlib.pyplot.scatter(X_train[:, 0], X_train[:, 1], label = 'train samples', marker='o', c = train_targets, cmap=cmap_bold,)
matplotlib.pyplot.scatter(X_test[:,0], X_test[:, 1], label = 'test samples', marker='+', c = predict_targets, cmap=cmap_bold)
legend = matplotlib.pyplot.legend()
matplotlib.pyplot.title("K Neighbors Classifier (Iris) [weight = %s]" %(weights))
matplotlib.pyplot.savefig("K Neighbors Classifier (Iris) [weight = %s].png" %(weights), format='png')
matplotlib.pyplot.show()
KNN
最新推荐文章于 2024-08-16 20:48:37 发布