cs231n knn的实现方法

我之前做cs231n看到的网上的解题过程,记录一下。原作者忘记是谁了

import numpy as np
class KNearestneighbor:
	def __init__(self):
		pass
	def train(self,X,y):
		self.x_train = X
		slef.y_train = y
	def predict(self,X,K=1,num_loops = 0):
		if num_loops == 0 :
			dists = self.compute_distance_no_loops(X)
		elif num_loops == 1:
			dists = self.compute_distance_one_loops(X)
		elif num_loops == 2:
			dists = self.compute_distance_two_loops(X)
		else :
			raise ValueError('Invalid value %d for num_loops '%num_loops)
		return self.predict_labels(dists,k=k)
	def predict_labels(self,dists,k = 1):
		#有多少要预测的样本
		num_test = dists.shape[0]
		#预测结果数组的初始化
		y_pred = np.zeros(num_test)
		for i in range(num_test):
			closest_y = []
			#np.argsort函数axis = 0把列的元素按从小到大排序返回的矩阵中存放的是索引值
			#y_indicies中存放的是与第i个预测值最接近的训练集序列
			#注意是第一维度
			y_indicies = np.argsort(dists[i,:],axis = 0)
			#closest_y存放的是与预测值接近的k个训练集图片
			closest_y = self.y_train[y_indicies[:k]]
			#返回的是第几类
			#尤其适用于计算数据集的标签列(y_train)的分布(distribution),也即获得 class distribution
			#bincount用于计数每个参数的个数,argmax用于获取数量最多的那个参数
			y_pred[i] = np.argmax(np.bincount(closest_y))
		return y_pred
	def compute_distance_no_loops(self,X):
		num_test = X.shape[0]
		num_train = self.x_train.shape[0]
		dists = np.zeros((num_test,num_train))
		#axis = 1是计算同一行 axis = 0是计算同一列
		#计算矩阵测试图片的像素值和
		test_sum = np.sum(np.square(X),axis = 1)
		#计算矩阵训练图片的像素值和
		train_sum = np.sum(np.square(self.x_train),axis = 1)
		inner_product = np.dot(X,self.x_train.T)
		# test_sum.reshape(-1,1)把列的元素放到行上
		dists = np.sqrt(-2 * inner_product + test_sum.reshape(-1,1) + train_sum)
		return dists
	def compute_distance_one_loop(self,X):
		num_test = X.shape[0]
		num_train = self.x_train.shape[0]
		dists = np.zeros((num_test,num_train))
		for i in range(num_test):
			dists[i,:] = np.sqrt(np.sum(np.square(self.x_train - X[i,:]),axis = 1))
		return dists
	def compute_distance_two_loops(self,X):
		num_test = X.shape[0]
		num_train = self.x_train.shape[0]
		dists = np.zeros((num_test,num_train))
		for i in range(num_test):
			for j in range(num_train):
				dists[i,j] = np.sqrt(np.sum(np.square(self.x_train[i,:] - X[j,:])))
import pickle
import numpy as np
import os

def load_cifar_batch(filename):
	with open(filename,'rb) as f:
		datadict = pickle.load(f,encoding = 'bytes')
		x = datadict[b'data']
		y = datadict[b'labels']
		#我们采用的是cifar10数据集,该数据集被分成5份训练集和1份测试集,其中每份有 1000张32*32的RGB图,共有10类
		x = x.reshape(10000,3,32,32).transpose(0,2,3,1).astype('float')
		y = np.array(y)
		return x,y
def load_cifar10(root):
	xs = []
	ys = []
	for b in range(1,6):
		f = os.path.join(root,'data_batch_%d'%(b,))
		x,y = load_cifar_batch(f)
		xs.append(x)
		ys.append(y)
		X_train = np.concatenate(xs)
		Y_train = np.concatenate(ys)
		del x,y
		X_test,Y_test = load_cifar_batch(os.path.join(root,'test_batch'))
		return X_train,Y_train,X_test,Y_test
import numpy as np
from dataDownload import load_cifar10
import matplotlib.pyplot as plt'
from knn import KNearestNeighbor
x_train,y_train,x_test,y_test = load_cifa10('cifar-10-batches-py')
classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck']
num_classes = len(classses)
samples_per_class = 7
num_training = 5000
mask = range(num_training)
x_train = x_train[mask]
y_train = y_train[mask]
num_test = 500
mask = range(num_test)
x_test = x_test[mask]
y_test = y_test[mask]
x_train = np.reshape(x_train,(x_train.shape[0],-1))
x_test = np.reshape(x_test,(x_test.shape[0],-1))

classifier = KNearestNeighbor()
classifier.train(x_train,y_train)
dists = classifier.compute_distance_two_loops(x_test)

y_test_pred = classifier.predice_labels(dists,k=1)
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy : %f' %(num_correct,num_test,accuracy))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值