CS231N课程作业Assignment1--Softmax

最新推荐文章于 2024-07-07 13:33:40 发布

鲁棒最小二乘支持向量机

最新推荐文章于 2024-07-07 13:33:40 发布

阅读量480

点赞数 1

分类专栏：笔记一起学AI深度学习文章标签：支持向量机机器学习深度学习 python

本文链接：https://blog.csdn.net/qq_42078934/article/details/122733595

版权

笔记同时被 2 个专栏收录

227 篇文章 190 订阅

订阅专栏

一起学AI深度学习

7 篇文章 2 订阅

订阅专栏

Assignment1–Softmax

作业要求见这里.
主要需要完成 KNN，SVM，Softmax分类器，还有一个两层的神经网络分类器的实现。
数据集CIFAR-10.

Softmax原理

将线性分类得到的得分值转化为概率值，进行多分类，在SVM中的输出是得分值，Softmax的输出是概率。
Softmax 函数：其输入值是一个向量，向量中元素为任意实数的评分值，输出一个向量，其中每个元素值在0到1之间，且所有元素之和为1。

构建Softmax分类器

程序整体框架如下：包括classifiers和datasets文件夹，softmax.py、data_utils.py、linear_softmax.py和linear_classifier.py
在这里插入图片描述

softmax.py

from linear_classifier import Softmax
import time
import numpy as np  #导入numpy的库函数
from datasets.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
from classifiers.linear_softmax import *
import math

cifar10_dir = 'E:/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
print('Training data shape: ',X_train.shape)
print('Training labels shape: ',y_train.shape)
print('Test data shape: ',X_test.shape)
print('Test labels shape: ',y_test.shape)

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7  #每个类别采样个数
for y,cls in enumerate(classes):    #(0,plane),y返回元素位置，cls返回元素本身
    idxs = np.flatnonzero(y_train==y) #找出标签中y类的位置
    idxs = np.random.choice(idxs,samples_per_class,replace=False) #从中随机算出7个样本
    for i,idx in enumerate(idxs): #对所选样本的位置和样本所对应的图片在训练集中的位置进行循环
        plt_idx = i * num_classes + y + 1 #在子图中所占位置的计算
        plt.subplot(samples_per_class,num_classes,plt_idx) #说明要画的子图的编号
        plt.imshow(X_train[idx].astype('uint8')) #画图
        plt.axis('off')
        if i == 0:
            plt.title(cls) #写上类别名
plt.show()

num_training = 49000  # 训练集   num_dev会从其中抽取一定数量的图片用于训练，减少训练时间
num_validation = 1000  # 验证集   在不同的学习率和正则参数下使用该验证集获取最高的正确率，最终找到最好的学习率和正则参数
num_test = 1000  # 测试集    在获取到最好的学习率和正则参数之后，测试最终的正确率
num_dev = 500   # 随机训练集   用于实现随机化梯度下降的
mask = list(range(num_training, num_training + num_validation)) # 从训练数据x_train和y_train中获取验证集数据
X_val = X_train[mask]
y_val = y_train[mask]
mask = list(range(num_training))  # 从训练数据x_train和y_train中获取全体训练集数据
X_train = X_train[mask]
y_train = y_train[mask]
mask = list(range(num_test))  # 从训练数据x_test和y_test中获取全体测试集数据
X_test = X_test[mask]
y_test = y_test[mask]
mask = np.random.choice(num_training, num_dev, replace=False)  # 从num_training中随机选取随机训练集数据
X_dev = X_train[mask]
y_dev = y_train[mask]

# 将x_train，x_val，x_test，x_dev这些n*32*32*3的图片集，转化成n*3072的矩阵；将每张图片拉伸成一维的矩阵，方便后面进行数据处理
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

# 将x_train，x_val，x_test，x_dev这些图片集进行去均值处理 ；统一量纲，和归一化操作类似，只是没有再除以方差而已
mean_image = np.mean(X_train, axis = 0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
print('loss: %f' % loss)
print('sanity check: %f' % (-np.log(0.1)))

tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
print('Gradient difference: %f' % grad_difference)

#调参
#两个参数，学习率；正则化强度
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-7, 3e-7, 5e-7, 9e-7]
regularization_strengths = [2.5e4, 1e4, 3e4, 2e4]
for lr in learning_rates:   # 循环执行代码；对不同的学习率以及正则化强度进行测试
    for reg in regularization_strengths:
        softmax = Softmax() # learning_rate学习率；reg正则化强度；num_iters步长值；batch_size每一步使用的样本数量；verbose若为真则打印过程
        loss_hist = softmax.train(X_train, y_train, learning_rate=lr, reg=reg,num_iters=1500, verbose=True)
        y_train_pred = softmax.predict(X_train)
        y_val_pred = softmax.predict(X_val)
        y_train_acc = np.mean(y_train_pred==y_train)  # np.mean():求取均值
        y_val_acc = np.mean(y_val_pred==y_val)
        results[(lr,reg)] = [y_train_acc, y_val_acc]
        if y_val_acc > best_val: # 判断优略
            best_val = y_val_acc
            best_softmax = softmax  # 保存当前模型
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]  # 存储数据
    print('lr %e reg %e train accuracy: %f val accuracy: %f' % (lr, reg, train_accuracy, val_accuracy))
    
print('best validation accuracy achieved during cross-validation: %f' % best_val)

x_scatter = [math.log10(x[0]) for x in results]
y_scatter = [math.log10(x[1]) for x in results]
marker_size = 100
colors = [results[x][0] for x in results]
plt.subplot(1, 2, 1)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization strength')
plt.title('CIFAR-10 training accuracy')
colors = [results[x][1] for x in results] # default size of markers is 20
plt.subplot(1, 2, 2)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization strength')
plt.title('CIFAR-10 validation accuracy')
plt.show()

y_test_pred = best_softmax.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))

#得到最优W时，W的可视化结果数据 W的图像可以看出权重的高低
w = best_softmax.W[:-1,:] # strip out the bias
w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']   # 类别划分  列表
for i in range(10):
    plt.subplot(2, 5, i + 1)
    # Rescale the weights to be between 0 and 255
    wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
    plt.imshow(wimg.astype('uint8'))
    plt.axis('off')
    plt.title(classes[i])
plt.show() #  W最终学习成的图片

data_utils.py

from __future__ import print_function

from builtins import range
from six.moves import cPickle as pickle
import numpy as np
import os
from imageio import imread
import platform

def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))

def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f:
        datadict = load_pickle(f)
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
        Y = np.array(Y)
        return X, Y

def load_CIFAR10(ROOT):
    """ load all of cifar """
    xs = []
    ys = []
    for b in range(1,6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte


def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
                     subtract_mean=True):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for classifiers. These are the same steps as we used for the SVM, but
    condensed to a single function.
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    if subtract_mean:
        mean_image = np.mean(X_train, axis=0)
        X_train -= mean_image
        X_val -= mean_image
        X_test -= mean_image

    # Transpose so that channels come first
    X_train = X_train.transpose(0, 3, 1, 2).copy()
    X_val = X_val.transpose(0, 3, 1, 2).copy()
    X_test = X_test.transpose(0, 3, 1, 2).copy()

    # Package data into a dictionary
    return {
      'X_train': X_train, 'y_train': y_train,
      'X_val': X_val, 'y_val': y_val,
      'X_test': X_test, 'y_test': y_test,
    }


def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
    """
    Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
    TinyImageNet-200 have the same directory structure, so this can be used
    to load any of them.

    Inputs:
    - path: String giving path to the directory to load.
    - dtype: numpy datatype used to load the data.
    - subtract_mean: Whether to subtract the mean training image.

    Returns: A dictionary with the following entries:
    - class_names: A list where class_names[i] is a list of strings giving the
      WordNet names for class i in the loaded dataset.
    - X_train: (N_tr, 3, 64, 64) array of training images
    - y_train: (N_tr,) array of training labels
    - X_val: (N_val, 3, 64, 64) array of validation images
    - y_val: (N_val,) array of validation labels
    - X_test: (N_test, 3, 64, 64) array of testing images.
    - y_test: (N_test,) array of test labels; if test labels are not available
      (such as in student code) then y_test will be None.
    - mean_image: (3, 64, 64) array giving mean training image
    """
    # First load wnids
    with open(os.path.join(path, 'wnids.txt'), 'r') as f:
        wnids = [x.strip() for x in f]

    # Map wnids to integer labels
    wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}

    # Use words.txt to get names for each class
    with open(os.path.join(path, 'words.txt'), 'r') as f:
        wnid_to_words = dict(line.split('\t') for line in f)
        for wnid, words in wnid_to_words.items():
            wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
    class_names = [wnid_to_words[wnid] for wnid in wnids]

    # Next load training data.
    X_train = []
    y_train = []
    for i, wnid in enumerate(wnids):
        if (i + 1) % 20 == 0:
            print('loading training data for synset %d / %d'
                  % (i + 1, len(wnids)))
        # To figure out the filenames we need to open the boxes file
        boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
        with open(boxes_file, 'r') as f:
            filenames = [x.split('\t')[0] for x in f]
        num_images = len(filenames)

        X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
        y_train_block = wnid_to_label[wnid] * \
                        np.ones(num_images, dtype=np.int64)
        for j, img_file in enumerate(filenames):
            img_file = os.path.join(path, 'train', wnid, 'images', img_file)
            img = imread(img_file)
            if img.ndim == 2:
        ## grayscale file
                img.shape = (64, 64, 1)
            X_train_block[j] = img.transpose(2, 0, 1)
        X_train.append(X_train_block)
        y_train.append(y_train_block)

    # We need to concatenate all training data
    X_train = np.concatenate(X_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)

    # Next load validation data
    with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
        img_files = []
        val_wnids = []
        for line in f:
            img_file, wnid = line.split('\t')[:2]
            img_files.append(img_file)
            val_wnids.append(wnid)
        num_val = len(img_files)
        y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
        X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
        for i, img_file in enumerate(img_files):
            img_file = os.path.join(path, 'val', 'images', img_file)
            img = imread(img_file)
            if img.ndim == 2:
                img.shape = (64, 64, 1)
            X_val[i] = img.transpose(2, 0, 1)

    # Next load test images
    # Students won't have test labels, so we need to iterate over files in the
    # images directory.
    img_files = os.listdir(os.path.join(path, 'test', 'images'))
    X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
    for i, img_file in enumerate(img_files):
        img_file = os.path.join(path, 'test', 'images', img_file)
        img = imread(img_file)
        if img.ndim == 2:
            img.shape = (64, 64, 1)
        X_test[i] = img.transpose(2, 0, 1)

    y_test = None
    y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
    if os.path.isfile(y_test_file):
        with open(y_test_file, 'r') as f:
            img_file_to_wnid = {}
            for line in f:
                line = line.split('\t')
                img_file_to_wnid[line[0]] = line[1]
        y_test = [wnid_to_label[img_file_to_wnid[img_file]]
                  for img_file in img_files]
        y_test = np.array(y_test)

    mean_image = X_train.mean(axis=0)
    if subtract_mean:
        X_train -= mean_image[None]
        X_val -= mean_image[None]
        X_test -= mean_image[None]

    return {
      'class_names': class_names,
      'X_train': X_train,
      'y_train': y_train,
      'X_val': X_val,
      'y_val': y_val,
      'X_test': X_test,
      'y_test': y_test,
      'class_names': class_names,
      'mean_image': mean_image,
    }


def load_models(models_dir):
    """
    Load saved models from disk. This will attempt to unpickle all files in a
    directory; any files that give errors on unpickling (such as README.txt)
    will be skipped.

    Inputs:
    - models_dir: String giving the path to a directory containing model files.
      Each model file is a pickled dictionary with a 'model' field.

    Returns:
    A dictionary mapping model file names to models.
    """
    models = {}
    for model_file in os.listdir(models_dir):
        with open(os.path.join(models_dir, model_file), 'rb') as f:
            try:
                models[model_file] = load_pickle(f)['model']
            except pickle.UnpicklingError:
                continue
    return models


def load_imagenet_val(num=None):
    """Load a handful of validation images from ImageNet.

    Inputs:
    - num: Number of images to load (max of 25)

    Returns:
    - X: numpy array with shape [num, 224, 224, 3]
    - y: numpy array of integer image labels, shape [num]
    - class_names: dict mapping integer label to class name
    """
    imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
    if not os.path.isfile(imagenet_fn):
      print('file %s not found' % imagenet_fn)
      print('Run the following:')
      print('cd cs231n/datasets')
      print('bash get_imagenet_val.sh')
      assert False, 'Need to download imagenet_val_25.npz'
    f = np.load(imagenet_fn)
    X = f['X']
    y = f['y']
    class_names = f['label_map'].item()
    if num is not None:
        X = X[:num]
        y = y[:num]
    return X, y, class_names

linear_softmax.py

import numpy as np  #导入numpy的库函数
from random import shuffle
def softmax_loss_naive(W, X, y, reg):
  """
  使用循环计算softmax的loss、dw
  输入：
  - W ： 一个numpy数组，维数（D，C）
  - X ：一个numpy数组，维数（N,D）
  - y : 一个numpy数组，维数（N，）

  返回：
  - loss
  - dw
  """
  #初始化loss,dW
  loss = 0.0                # 初始化损失
  dW = np.zeros_like(W)     # 返回与指定数组具有相同形状和数据类型的数组
  num_classes = W.shape[1]  # 划分的种类 
  num_train = X.shape[0]    # 训练样本的数量
  for i in range(num_train):  # 分别求每个训练样本的损失
    scores = X[i].dot(W)   # 计算每个样本的分数；计算当前W和当前训练图片X[i]在各个图片种类下的分数scores
    shift_scores = scores - max(scores)
    loss_i = -shift_scores[y[i]] + np.log(sum(np.exp(shift_scores)))  # loss function 公式
    loss += loss_i
    for j in range(num_classes):
      softmax_output = np.exp(shift_scores[j])/sum(np.exp(shift_scores))  # 损失函数求梯度
      if j == y[i]:
        dW[:,j] += (-1 + softmax_output) * X[i]
      else:
        dW[:,j] += softmax_output * X[i]
  loss /= num_train
  loss += 0.5 * reg * np.sum(W*W)  # 加入正则化
  dW = dW/num_train + reg * W
  return loss, dW    # loss : 损失函数的值 ； dW : 权重W的梯度，和W大小相同的array
def softmax_loss_vectorized(W, X, y, reg):
  """
  向量化的计算loss,dw
  输入/输出：与softmax_loss_naive一致
  """
  #初始化参数
  loss = 0.0
  dW = np.zeros_like(W)
  num_classes = W.shape[1]
  num_train = X.shape[0]
  scores = X.dot(W)
  shift_scores = scores - np.max(scores,axis=1).reshape(-1,1)
  softmax_output = np.exp(shift_scores)/np.sum(np.exp(shift_scores),axis = 1).reshape(-1,1)
  loss = -np.sum(np.log(softmax_output[range(num_train),list(y)]))
  loss /= num_train
  loss += 0.5 * reg * np.sum(W*W)
  dS = softmax_output.copy()
  dS[range(num_train),list(y)] += -1
  dW = (X.T).dot(dS)
  dW = dW / num_train + reg * W
  return loss, dW

linear_classifier.py

from __future__ import print_function

from builtins import range
from builtins import object
import numpy as np
from classifiers.linear_softmax import *

class LinearClassifier(object):

    def __init__(self):
        self.W = None   # learning_rate学习率；reg正则化强度；num_iters步长值；batch_size每一步使用的样本数量；verbose若为真则打印过程
    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,batch_size=200, verbose=False):
        num_train, dim = X.shape  # 训练W的代码中，先是获取了训练集图片数量num_train，图片种类数量num_classes。然后随机初始化了W
        num_classes = np.max(y) + 1  # 假设y取0…K-1，其中K是类的数目
        if self.W is None:
            self.W = 0.001 * np.random.randn(dim, num_classes)  # 初始化W矩阵
        loss_history = []  # 运行随机梯度下降优化W  用于储存每次迭代的损失值
        for it in range(num_iters):  #开始训练num_iters步
            X_batch = None
            y_batch = None
            batch_inx = np.random.choice(num_train,batch_size)  # 选取部分训练样本；随机生成一个序列；从训练集x_train和y_train中再取出batch_size数量的数据集，再次减少训练时间
            X_batch = X[batch_inx,:]
            y_batch = y[batch_inx]

            loss, grad = self.loss(X_batch, y_batch, reg)  # 计算损失与梯度
            loss_history.append(loss)

            self.W = self.W - learning_rate * grad  # 参数更新；梯度为正表示损失增大，应该减少，成负相关

            if verbose and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))

        return loss_history

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])

        score = X.dot(self.W) #根据训练后的W矩阵计算分数
        y_pred = np.argmax(score,axis=1)  #找到得分中最大的值作为类别；计算每一行最大值

        return y_pred

    def loss(self, X_batch, y_batch, reg):
        pass

class Softmax(LinearClassifier):
  """ A subclass that uses the Softmax + Cross-entropy loss function """

  def loss(self, X_batch, y_batch, reg):
    return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)