代码中的 batch_size
设置为 5,这可能需要根据实际情况调整以获得更好的训练效果。 import numpy as np
from PIL import Image
import os
def labeldata2(MNIST_labels_path):
with open(MNIST_labels_path, 'rb') as f:
file_labels = f.read()
train_label = []
for i in range(1, 7):
label = int.from_bytes(file_labels[i + 8 - 1:8 + i], 'big')
train_label.append(label)
return train_label
def imagedata(image_folder):
image_files = os.listdir(image_folder)
image_list = []
for image_file in image_files:
file_path = os.path.join(image_folder, image_file)
with Image.open(file_path) as img:
img_gray = img.convert('L')
img_resized = img_gray.resize((28, 28))
image_arry = np.array(img_resized)
image_arry = np.array(img_resized, dtype=np.float32) # 转换为浮点数
image_arry=image_arry/255 #数据归一化
image_list.append(image_arry)
mnist_images = np.stack(image_list)
train_image = mnist_images.reshape(mnist_images.shape[0], -1)
return train_image
def labeldata(MNIST_labels_path):
with open(MNIST_labels_path, 'rb') as f:
file_labels = f.read()
train_label = []
for i in range(1, 21):
label = int.from_bytes(file_labels[i + 8 - 1:8 + i], 'big')
train_label.append(label)
return train_label
def one_hot_encode(Y, num_classes):
return np.eye(num_classes)[Y]
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size, reg_lambda=0.01):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.reg_lambda = reg_lambda
self.W1 = np.random.randn(self.input_size, self.hidden_size) / np.sqrt(2.0/self.input_size)
self.b1 = np.zeros((1, self.hidden_size))
self.W2 = np.random.randn(self.hidden_size, self.output_size) / np.sqrt(2.0/self.hidden_size)
self.b2 = np.zeros((1, self.output_size))
self.gamma1 = np.ones((self.hidden_size,))
self.beta1 = np.zeros((self.hidden_size,))
self.gamma2 = np.ones((self.output_size,))
self.beta2 = np.zeros((self.output_size,))
def relu(self, Z):
return np.maximum(0, Z)
def softmax(self, Z):
expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
expZ[expZ < 1e-9] = 1e-9
return expZ / np.sum(expZ, axis=1, keepdims=True)
def batchnorm_forward(self, X, gamma, beta, eps=1e-5):
mu = np.mean(X, axis=0)
var = np.var(X, axis=0)
X_norm = (X - mu) / np.sqrt(var + eps)
return X_norm * gamma + beta, mu, var
def forward(self, X):
self.X = X
self.Z1 = np.dot(X, self.W1) + self.b1
X1_bn, self.bn_mu1, self.bn_var1 = self.batchnorm_forward(self.Z1, self.gamma1, self.beta1)
self.A1 = self.relu(X1_bn)
self.Z2 = np.dot(self.A1, self.W2) + self.b2
X2_bn, self.bn_mu2, self.bn_var2 = self.batchnorm_forward(self.Z2, self.gamma2, self.beta2)
self.A2 = self.softmax(X2_bn)
return self.A2
def backward(self, X, Y, Y_pred):
m = X.shape[0]
dZ2 = Y_pred - Y
dW2 = np.dot(self.A1.T, dZ2) / m
db2 = np.sum(dZ2, axis=0, keepdims=True) / m
dA1 = np.dot(dZ2, self.W2.T)
dZ1 = dA1 * (self.A1 > 0)
dW1 = np.dot(X.T, dZ1) / m
db1 = np.sum(dZ1, axis=0, keepdims=True) / m
dgamma2, dbeta2 = np.sum(dZ2 * (self.Z2 - self.bn_mu2), axis=0), np.sum(dZ2, axis=0)
dgamma1, dbeta1 = np.sum(dZ1 * (self.Z1 - self.bn_mu1), axis=0), np.sum(dZ1, axis=0)
dZ2_bn = dZ2 * self.gamma2
dZ1_bn = np.dot(dZ2_bn, self.W2.T) * (self.Z1 - self.bn_mu1) * self.gamma1
dX1 = dZ1_bn / np.sqrt(self.bn_var1 + 1e-5)
# Add regularization terms
dW1 += -self.reg_lambda * self.W1 / m
dW2 += -self.reg_lambda * self.W2 / m
return dX1, dW1, db1, dW2, db2, dgamma1, dbeta1, dgamma2, dbeta2
def update(self, dW1, dW2, db1, db2, dgamma1, dbeta1, dgamma2, dbeta2, learning_rate):
self.W1 -= learning_rate * dW1
self.b1 -= learning_rate * db1
self.W2 -= learning_rate * dW2
self.b2 -= learning_rate * db2
self.gamma1 -= learning_rate * dgamma1
self.beta1 -= learning_rate * dbeta1
self.gamma2 -= learning_rate * dgamma2
self.beta2 -= learning_rate * dbeta2
def compute_loss(self, Y_pred, Y):
m = Y.shape[0]
loss = -np.sum(Y * np.log(Y_pred + 1e-9)) / m
return loss
def train(self, X, Y, learning_rate, num_iters, batch_size):
num_samples = X.shape[0]
for i in range(num_iters):
indices = np.random.choice(num_samples, batch_size)
X_batch = X[indices]
Y_batch = Y[indices]
Y_batch_one_hot = one_hot_encode(Y_batch, self.output_size)
Y_pred = self.forward(X_batch)
loss = self.compute_loss(Y_pred, Y_batch_one_hot)
dX1, dW1, db1, dW2, db2, dgamma1, dbeta1, dgamma2, dbeta2 = self.backward(X_batch, Y_batch_one_hot, Y_pred)
self.update(dW1, dW2, db1, db2, dgamma1, dbeta1, dgamma2, dbeta2, learning_rate)
if i % 100 == 0:
print(f"Iteration {i}: loss {loss}")
def predict(self, X):
Y_pred = self.forward(X)
return np.argmax(Y_pred, axis=1)
def evaluate_accuracy(self, X, Y):
predictions = self.predict(X)
accuracy = np.mean(predictions == Y)
return accuracy
if __name__ == "__main__":
# 加载训练数据集
image_folder = "D:\\MNIST_data\\train1"
MNIST_labels_path = 'D:\\MNIST_data\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte'
X_train = imagedata(image_folder)
y_train = labeldata(MNIST_labels_path)
y_train = np.array(y_train)
# 初始化网络和训练
input_size = 784 # 28x28
hidden_size = 128
output_size = 10
nn = NeuralNetwork(input_size, hidden_size, output_size)
learning_rate = 0.001
num_iters = 1000
batch_size = 32
nn.train(X_train, y_train, learning_rate, num_iters, batch_size)
# 评估训练集上的准确率
accuracy = nn.evaluate_accuracy(X_train, y_train)
print(f"Accuracy on the training set: {accuracy * 100:.2f}%")
test_image_folder = "D:\\MNIST_data\\test" # 这里应该是测试集的路径
test_MNIST_labels_path = 'D:\\MNIST_data\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte' # 测试集标签路径
X_test = imagedata(test_image_folder)
y_test = labeldata2(test_MNIST_labels_path) # 确保这是测试集的标签
y_test = np.array(y_test)
y_test = y_test.astype(int)
accuracy = nn.evaluate_accuracy(X_train, y_train)
print(f"Accuracy on the training set: {accuracy * 100:.2f}%")