【人工智能】利用numpy手动实现MNIST数据集下的数字识别

import numpy as np
from PIL import Image
import os

# 假设其他函数(如 imagedata 和 labeldata)保持不变
def imagedata(image_folder):
    image_files = os.listdir(image_folder)
    image_list = []
    for image_file in image_files:
        file_path = os.path.join(image_folder, image_file)
        with Image.open(file_path) as img:
            img_gray = img.convert('L')
            img_resized = img_gray.resize((28, 28))
            image_arry = np.array(img_resized)
            image_arry = np.array(img_resized, dtype=np.float32)  # 转换为浮点数
            image_arry=image_arry/255   #数据归一化
            image_list.append(image_arry)
    mnist_images = np.stack(image_list)
    train_image = mnist_images.reshape(mnist_images.shape[0], -1)
    # train_image-=np.mean(train_image)
    return train_image


def labeldata(MNIST_labels_path):
    with open(MNIST_labels_path, 'rb') as f:
        file_labels = f.read()
    train_label = []
    for i in range(1, 21):
        label = int.from_bytes(file_labels[i + 8 - 1:8 + i], 'big')
        train_label.append(label)
    return train_label


def labeldata2(MNIST_labels_path):
    with open(MNIST_labels_path, 'rb') as f:
        file_labels = f.read()
    train_label = []
    for i in range(1, 7):
        label = int.from_bytes(file_labels[i + 8 - 1:8 + i], 'big')
        train_label.append(label)
    return train_label


class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size,reg_lambda=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.reg_lambda = reg_lambda
        # self.W1 = np.random.randn(self.input_size, self.hidden_size)
        self.W1 = np.random.randn(self.input_size, self.hidden_size) / np.sqrt(2.0/self.input_size)
        self.b1 = np.zeros((1, self.hidden_size))
        # self.W2 = np.random.randn(self.hidden_size, self.output_size)
        self.W2 = np.random.randn(self.hidden_size, self.output_size) / np.sqrt(2.0/self.hidden_size)
        self.b2 = np.zeros((1, self.output_size))


    def relu(self, Z):
            return np.maximum(0, Z)

    # def softmax(self, Z):
    #     expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    #     return expZ / np.sum(expZ, axis=1, keepdims=True)
    #确保 softmax 函数不会输出小于 1e-9 的概率值,以解决损失函数出现负极小值的问题
    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        expZ[expZ < 1e-9] = 1e-9  # 确保没有小于1e-9的值
        return expZ / np.sum(expZ, axis=1, keepdims=True)

    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.softmax(self.Z2)
        return self.A2

    def compute_loss(self, Y_pred, y_batch_one_hot):
        m = y_batch_one_hot.shape[0]
        loss = -np.sum(y_batch_one_hot * np.log(Y_pred + 1e-9)) / m
        # 添加L2正则化项
        reg_loss = self.reg_lambda * (np.sum(self.W1 ** 2) + np.sum(self.W2 ** 2)) / (2 * m)
        loss += reg_loss
        return loss
        # m = y_batch_one_hot.shape[0]
        # # 使用np.log计算对数时添加1e-9避免计算负无穷大
        # loss = -np.sum(y_batch_one_hot * np.log(Y_pred + 1e-9)) / m   #转为y_batch_one_hot编码,是将其他类别的概率logpi设置为0,只保留只缺类别的log值
        # return loss

    def train(self, X, y, learning_rate, num_iters, batch_size):
        num_samples = X.shape[0]
        for i in range(num_iters):
            # 随机采样小批量数据
            indices = np.random.choice(num_samples, batch_size)
            X_batch = X[indices]
            y_batch = y[indices]

            # 将y_batch转换为one-hot编码
            y_batch_one_hot = np.eye(self.output_size)[y_batch]

            # 前向传播:计算模型的预测输出
            Y_pred = self.forward(X_batch)

            # 计算损失
            loss = self.compute_loss(Y_pred, y_batch_one_hot)

            # 反向传播:计算梯度
            # 计算dZ2和dW2, db2
            dZ2 = Y_pred - y_batch_one_hot
            dW2 = np.dot(self.A1.T, dZ2) / batch_size
            db2 = np.sum(dZ2, axis=0, keepdims=True) / batch_size

            # 反向传播到ReLU层
            dA1 = np.dot(dZ2, self.W2.T)
            dZ1 = dA1 * (self.A1 > 0)  # ReLU反向传播

            # 计算dW1和db1
            dW1 = np.dot(X_batch.T, dZ1) / batch_size
            db1 = np.sum(dZ1, axis=0, keepdims=True) / batch_size


            dW1 += -self.reg_lambda * self.W1 / batch_size
            dW2 += -self.reg_lambda * self.W2 / batch_size
            #最大范数约束
            # self.W1 = np.clip(self.W1, -self.max_norm, self.max_norm)
            # self.W2 = np.clip(self.W2, -self.max_norm, self.max_norm)

            # 更新权重和偏置
            self.W1 -= learning_rate * dW1
            self.b1 -= learning_rate * db1
            self.W2 -= learning_rate * dW2
            self.b2 -= learning_rate * db2

            # 打印损失信息
            if i % 100 == 0:
                print(f"Iteration {i}: loss {loss}")
    def predict(self, X):
        Y_pred = self.forward(X)
        return np.argmax(Y_pred, axis=1)

    def evaluate_accuracy(self, X, y):
        predictions = self.predict(X)
        print(predictions)
        print(y)
        accuracy = np.mean(predictions == y)
        return accuracy

if __name__ == "__main__":
    # 加载训练数据集
    image_folder = "D:\\MNIST_data\\train1"
    MNIST_labels_path = 'D:\\MNIST_data\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte'
    X_train = imagedata(image_folder)
    y_train = labeldata(MNIST_labels_path)
    y_train = np.array(y_train)
    y_train = y_train.astype(int)

    # 数据预处理
    # X_train = X_train / 255.0  # 像素值归一化

    # 初始化网络和训练
    input_size = 784  # 28x28
    hidden_size = 128
    output_size = 10
    nn = NeuralNetwork(input_size, hidden_size, output_size, reg_lambda=0.000001)
    learning_rate = 0.001
    num_iters = 1000
    batch_size = 5
    nn.train(X_train, y_train, learning_rate, num_iters, batch_size)

    test_image_folder = "D:\\MNIST_data\\test"  # 这里应该是测试集的路径
    test_MNIST_labels_path = 'D:\\MNIST_data\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte' # 测试集标签路径
    X_test = imagedata(test_image_folder)
    y_test = labeldata2(test_MNIST_labels_path)  # 确保这是测试集的标签
    y_test = np.array(y_test)
    y_test = y_test.astype(int)

    accuracy = nn.evaluate_accuracy(X_train, y_train)
    print(f"Accuracy on the training set: {accuracy * 100:.2f}%")



    # 评估测试集上的准确率
    accuracy_test = nn.evaluate_accuracy(X_test, y_test)
    print(f"Accuracy on the test set: {accuracy_test * 100:.2f}%")

本文采用了最基本的神经网络技术,搭建了一个两层的神经网络,采用L2正则化。

好的,下面是利用Numpy实现MNIST手写数字识别的代码示例: ```python import numpy as np import gzip # 加载MNIST数据集 def load_data(): with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f: train_data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28 * 28) with gzip.open('train-labels-idx1-ubyte.gz', 'rb') as f: train_label = np.frombuffer(f.read(), np.uint8, offset=8) with gzip.open('t10k-images-idx3-ubyte.gz', 'rb') as f: test_data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28 * 28) with gzip.open('t10k-labels-idx1-ubyte.gz', 'rb') as f: test_label = np.frombuffer(f.read(), np.uint8, offset=8) return train_data, train_label, test_data, test_label # 将标签转换为one-hot向量 def to_one_hot(labels, num_classes): num_labels = labels.shape[0] one_hot = np.zeros((num_labels, num_classes)) one_hot[np.arange(num_labels), labels] = 1 return one_hot # Softmax函数 def softmax(x): exp_x = np.exp(x) return exp_x / np.sum(exp_x, axis=1, keepdims=True) # 损失函数 def cross_entropy_loss(y_pred, y_true): num_samples = y_pred.shape[0] loss = -np.sum(y_true * np.log(y_pred)) / num_samples return loss # 定义模型 class Model: def __init__(self, input_dim, hidden_dim, output_dim): self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.W1 = np.random.randn(input_dim, hidden_dim) self.b1 = np.zeros((1, hidden_dim)) self.W2 = np.random.randn(hidden_dim, output_dim) self.b2 = np.zeros((1, output_dim)) # 前向传播 def forward(self, x): self.z1 = np.dot(x, self.W1) + self.b1 self.a1 = np.tanh(self.z1) self.z2 = np.dot(self.a1, self.W2) + self.b2 self.a2 = softmax(self.z2) return self.a2 # 反向传播 def backward(self, x, y_true, y_pred): delta2 = y_pred - y_true delta1 = np.dot(delta2, self.W2.T) * (1 - np.power(self.a1, 2)) dW2 = np.dot(self.a1.T, delta2) db2 = np.sum(delta2, axis=0, keepdims=True) dW1 = np.dot(x.T, delta1) db1 = np.sum(delta1, axis=0) return dW1, db1, dW2, db2 # 训练模型 def train(self, x_train, y_train, epochs, learning_rate): num_samples, input_dim = x_train.shape num_classes = y_train.shape[1] for epoch in range(epochs): for i in range(num_samples): x = x_train[i:i+1] y_true = y_train[i:i+1] y_pred = self.forward(x) dW1, db1, dW2, db2 = self.backward(x, y_true, y_pred) self.W1 -= learning_rate * dW1 self.b1 -= learning_rate * db1 self.W2 -= learning_rate * dW2 self.b2 -= learning_rate * db2 y_pred_train = self.forward(x_train) train_loss = cross_entropy_loss(y_pred_train, y_train) print('Epoch %d, loss: %f' % (epoch, train_loss)) # 测试模型 def test(self, x_test, y_test): y_pred = self.forward(x_test) test_loss = cross_entropy_loss(y_pred, y_test) accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) print('Test loss: %f, accuracy: %f' % (test_loss, accuracy)) # 加载数据集 x_train, y_train, x_test, y_test = load_data() # 将标签转换为one-hot向量 y_train = to_one_hot(y_train, 10) y_test = to_one_hot(y_test, 10) # 定义模型 model = Model(input_dim=784, hidden_dim=128, output_dim=10) # 训练模型 model.train(x_train, y_train, epochs=10, learning_rate=0.01) # 测试模型 model.test(x_test, y_test) ``` 这段代码实现了一个单隐藏层的神经网络,用于对MNIST手写数字进行分类。其中,softmax函数用于将神经网络的输出转换为概率分布,cross_entropy_loss函数用于计算损失函数,Model类定义了神经网络的结构和训练过程。在训练过程中,我们通过反向传播算法更新权重和偏置,将模型训练到收敛。最后,我们通过测试集评估模型的性能。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值