1.代码
1.1 dataPerpare.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by wjbKimberly on 17-10-31
import numpy as np
import sys
sys.path.append("../../../")
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
def dataPrepare(cifar10_dir,num_training,num_test):
# Load the raw CIFAR-10 data.
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
#print
# Training data shape: (50000, 32, 32, 3)
# Training labels shape: (50000,)
# Test data shape: (10000, 32, 32, 3)
# Test labels shape: (10000,)
# Visualize some examples from the dataset.
# We show a few examples of training images from each class.
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
idxs = np.flatnonzero(y_train == y)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls)
# plt.show()
# Subsample the data for more efficient code execution in this exercise
# Only choose top 5000 in training data
# Only choose top 500 in test data
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
return X_train, y_train, X_test, y_test
1.2 myKnn.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by wjbKimberly on 17-10-21
import numpy as np
from dataPerpare import dataPrepare
import sys
sys.path.append("../../../")
from cs231n.classifiers import KNearestNeighbor
import matplotlib.pyplot as plt
# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
# load data
cifar10_dir = '../../../cs231n/datasets/cifar-10-batches-py'
num_training = 5000
num_test = 500
X_train, y_train, X_test, y_test=dataPrepare(cifar10_dir,num_training,num_test)
# KNN begin
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.
# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)
print(dists.shape)
# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()
# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)
# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))
# Output:
# Got 137 / 500 correct => accuracy: 0.274000
# Got 139 / 500 correct => accuracy: 0.278000
# Now lets speed up distance matrix computation by using partial vectorization
# with one loop. Implement the function compute_distances_one_loop and run the
# code below:
dists_one = classifier.compute_distances_one_loop(X_test)
# To ensure that our vectorized implementation is correct, we make sure th