#-*-coding:utf-8-*-
import numpy as np
import time
import matplotlib.pyplot as plt
from data_utils import load_CIFAR10,get_CIFAR10_data
cifar10_dir = 'datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# print("训练数据:",X_train.shape)
sample_num = 250; validation_num = 100;
X_sample = X_train[range(sample_num)]
y_sample = y_test[range(sample_num)]
X_val = X_test[range(validation_num)]
y_val = y_test[range(validation_num)]
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_sample -= mean_image
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_sample = np.reshape(X_sample, (X_sample.shape[0], -1))
# print(X_val.shape)
# print(y_val.shape)
# print(X_sample.shape)
# print(y_sample.shape)
def softmax_loss_naive(W, X, y, reg):
loss = 0.0
dW = np.zeros_like(W)
num_train = X.shape[0]
num_class = W.shape[1]
for i in range(num_train):
s = X[i].dot(W)
scores = s - max(s)
scores_E = np.exp(scores)
Z = np.sum(scores_E)
scores_target = scores_E[y[i]]
loss += -np.log(scores_target/Z)
for j in range(num_class):
if j == y[i]:
dW[:, j] += -(1 - scores_E[j] / Z) * X [i]
else:
dW[:,j] += X[i] * scores_E[j] / Z
loss = loss / num_train + 0.5 * reg * np.sum(W * W)
dW = dW / num_train + reg * W
return loss, dW
W = np.random.randn(3072, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_sample, y_sample, 0.0)
print(loss)
print(-np.log(0.1))
2.375000486697351
2.3025850929940455