1019手写数字识别

最新推荐文章于 2024-10-18 00:00:00 发布

先打开电脑

最新推荐文章于 2024-10-18 00:00:00 发布

阅读量1.6k

点赞数 26

文章标签： python numpy 开发语言

本文链接：https://blog.csdn.net/weixin_43710382/article/details/140716140

版权

手写识别1

import numpy as np
import struct
import os
import matplotlib.pyplot as plt

def load_labels(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]), dtype=np.int32)


def load_images(file):  #  加载数据
    with open(file, "rb") as f:
        data = f.read()
    magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
    return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

class Dataset:
    def __init__(self,x,y,batch_size , shuffle = True):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        temp = DataLoader(self)
        return temp

    def __len__(self):
        return len(self.x)

class DataLoader:
    def __init__(self,dataset):
        self.dataset = dataset
        self.cursor = 0
        self.indexs = np.arange(len(self.dataset))
        if self.dataset.shuffle:
            np.random.shuffle(self.indexs)

    def __next__(self):

        if self.cursor >= len(self.dataset):
            raise StopIteration

        index = self.indexs[self.cursor : self.cursor + self.dataset.batch_size]
        x = self.dataset.x[index]
        y = self.dataset.y[index]

        self.cursor += self.dataset.batch_size

        return x ,y

def label_to_onehot(labels,class_num=10):
    batch_num = labels.shape[0]
    result = np.zeros((batch_num,class_num))

    for i,l in enumerate(labels):
        result[i][l] = 1
    return result

def softmax(x):
    ex = np.exp(x)
    # sum_ex = np.sum(ex,axis=1).reshape(-1,1)
    sum_ex = np.sum(ex,axis=1,keepdims=True)#这里我们要注意，其实这里保持维度是没有影响的，因为我们只对最后一个维度求和，所以这里的keepdims=True，不然我们batch·size=2的时候，就会变成1维的。
    #keepdims是保持原有的形状不变

    return ex/sum_ex


if __name__ == "__main__":
    train_data = load_images(os.path.join("..","data","train-images.idx3-ubyte")) / 255#这里需要除以255，因为数据集的像素值是0-255，我们对数据进行归一化处理
    train_label = label_to_onehot(load_labels(os.path.join("..","data","train-labels.idx1-ubyte")))

    dev_data = load_images(os.path.join("..", "data", "t10k-images.idx3-ubyte")) / 255
    dev_label = load_labels(os.path.join("..", "data", "t10k-labels.idx1-ubyte"))

    W = np.random.normal(0,1,size=(784,10))
    b = 0

    lr = 0.01
    epoch = 1000
    batch_size = 20

    dataset = Dataset(train_data,train_label,batch_size,shuffle=False)

    for e in range(epoch):
        print(f"{e}_{'*'*100}")
        for bi,(batch_data,batch_label) in enumerate(dataset):
            pre = batch_data @ W + b   # 1 * 784 @ 784 * 10 = 1 * 10
            soft_pre = softmax(pre)#这里是多元交叉熵和softmax的结合，
            #
            #归一化
            loss = -np.sum(batch_label*np.log(soft_pre)) / batch_data.shape[0]
            #多元交叉熵

            G = (soft_pre - batch_label ) / batch_data.shape[0]
            #这里是多元交叉熵和softmax的结合，我们求导的时候，要先求loss对于soft_pre的导数，再求soft_pre对pre的倒数，最后求w和b的导数
            #经过查询，我们知道loss对pre的导数就是soft_pre - batch_label,  就是经过softmax之后的结果与真实标签的差值
            delta_W = batch_data.T @ G
            delta_b = np.mean(G)#这里就是死记硬背，

            W -= lr * delta_W
            b -= lr * delta_b

            # if bi % 50 == 0 :
            #     print(loss)

        right_num = 0
        p = dev_data @ W + b#这里是预测的结果
        p = np.argmax(p,axis=1)#我们就不用softmax了，直接取最大值

        for pl,tl in zip(p,dev_label):
            if pl == tl:
              right_num+=1

        acc = right_num/len(dev_label) * 100
        print(f"acc:{acc}%")

手写识别2

我们将隐藏层变成两层，就是加一个w2

import numpy as np
import struct
import os
import matplotlib.pyplot as plt

def load_labels(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]), dtype=np.int32)


def load_images(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
    return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

class Dataset:
    def __init__(self,x,y,batch_size , shuffle = True):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        temp = DataLoader(self)
        return temp

    def __len__(self):
        return len(self.x)

class DataLoader:
    def __init__(self,dataset):
        self.dataset = dataset
        self.cursor = 0

        self.indexs = np.arange(len(self.dataset))
        if self.dataset.shuffle:
            np.random.shuffle(self.indexs)

    def __next__(self):

        if self.cursor >= len(self.dataset):
            raise StopIteration

        index = self.indexs[self.cursor : self.cursor + self.dataset.batch_size]
        x = self.dataset.x[index]
        y = self.dataset.y[index]

        self.cursor += self.dataset.batch_size

        return x ,y

def label_to_onehot(labels,class_num=10):
    batch_num = labels.shape[0]
    result = np.zeros((batch_num,class_num))

    for i,l in enumerate(labels):
        result[i][l] = 1
    return result

def softmax(x):
    ex = np.exp(x)
    # sum_ex = np.sum(ex,axis=1).reshape(-1,1)
    sum_ex = np.sum(ex,axis=1,keepdims=True)
    return ex/sum_ex

def sigmoid(x):
    # x = np.clip(x,-1000,1000)
    return 1/(1+np.exp(-x))

def tanh(x):
    return 2 * sigmoid(2*x) - 1


if __name__ == "__main__":
    train_data = load_images(os.path.join("..","data","train-images.idx3-ubyte")) / 255
    train_label = label_to_onehot(load_labels(os.path.join("..","data","train-labels.idx1-ubyte")))

    dev_data = load_images(os.path.join("..", "data", "t10k-images.idx3-ubyte")) / 255
    dev_label = load_labels(os.path.join("..", "data", "t10k-labels.idx1-ubyte"))

    # b1 = 0
    # b2 = 0
    lr = 0.1
    epoch = 1000
    batch_size = 100
    hidden_size = 256
    
    W1 = np.random.normal(0,1,size=(784,hidden_size)) # 凯明初始化
    W2 = np.random.normal(0,1,size=(hidden_size,10))

    b1 = np.zeros((1,hidden_size))
    b2 = np.zeros((1,10))

    dataset = Dataset(train_data,train_label,batch_size,shuffle=True)

    for e in range(epoch):
        print(f"{e}_{'*'*100}")
        for bi,(batch_data,batch_label) in enumerate(dataset):
            hidden = batch_data @ W1 + b1   # 1 * 784 @ 784 * 10 = 1 * 10
            tanh_hid = sigmoid(hidden)
            pre = tanh_hid @ W2 + b2
            soft_pre = softmax(pre)
            loss = -np.sum(batch_label*np.log(soft_pre)) / batch_data.shape[0]

            G1 = (soft_pre - batch_label ) / batch_data.shape[0]
            delta_W2 = tanh_hid.T @ G1
            delta_th = G1 @ W2.T

            G2 = delta_th * tanh_hid * (1-tanh_hid)#在这一步，我们是想求loss对于pre的导数，其实就是loss对于tanh的导数乘上tanh对于pre的导数。
            delta_W1 = batch_data.T @ G2

            delta_b1 = np.sum(G2,axis=0,keepdims=True)
            delta_b2 = np.sum(G1,axis=0,keepdims=True)

            W1 -= lr * delta_W1
            W2 -= lr * delta_W2
            b1 -= lr * delta_b1
            b2 -= lr * delta_b2

            # if bi % 50 == 0 :
            #     print(loss)

        right_num = 0
        h = dev_data @ W1 + b1
        h = sigmoid(h)
        p = h @ W2 + b2
        p = np.argmax(p,axis=1)

        for pl,tl in zip(p,dev_label):
            if pl == tl:
              right_num+=1

        acc = right_num/len(dev_label) * 100
        print(f"acc:{acc}%")

手写识别3

没改什么

import numpy as np
import struct
import os
import matplotlib.pyplot as plt

def load_labels(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]), dtype=np.int32)


def load_images(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
    return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

class Dataset:
    def __init__(self,x,y,batch_size , shuffle = True):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        temp = DataLoader(self)
        return temp

    def __len__(self):
        return len(self.x)

class DataLoader:
    def __init__(self,dataset):
        self.dataset = dataset
        self.cursor = 0

        self.indexs = np.arange(len(self.dataset))
        if self.dataset.shuffle:
            np.random.shuffle(self.indexs)

    def __next__(self):

        if self.cursor >= len(self.dataset):
            raise StopIteration

        index = self.indexs[self.cursor : self.cursor + self.dataset.batch_size]
        x = self.dataset.x[index]
        y = self.dataset.y[index]

        self.cursor += self.dataset.batch_size

        return x ,y

def label_to_onehot(labels,class_num=10):
    batch_num = labels.shape[0]
    result = np.zeros((batch_num,class_num))

    for i,l in enumerate(labels):
        result[i][l] = 1
    return result

def softmax(x):
    ex = np.exp(x)
    # sum_ex = np.sum(ex,axis=1).reshape(-1,1)
    sum_ex = np.sum(ex,axis=1,keepdims=True)
    return ex/sum_ex

def sigmoid(x):
    # x = np.clip(x,-1000,1000)
    return 1/(1+np.exp(-x))

def tanh(x):
    return 2 * sigmoid(2*x) - 1


if __name__ == "__main__":
    train_data = load_images(os.path.join("..","data","train-images.idx3-ubyte")) / 255
    train_label = label_to_onehot(load_labels(os.path.join("..","data","train-labels.idx1-ubyte")))

    dev_data = load_images(os.path.join("..", "data", "t10k-images.idx3-ubyte")) / 255
    dev_label = load_labels(os.path.join("..", "data", "t10k-labels.idx1-ubyte"))

    # b1 = 0
    # b2 = 0
    lr = 0.1
    epoch = 1000
    batch_size = 100
    hidden_size = 256
    
    W1 = np.random.normal(0,1,size=(784,hidden_size)) # 凯明初始化
    W2 = np.random.normal(0,1,size=(hidden_size,10))

    b1 = np.zeros((1,hidden_size))
    b2 = np.zeros((1,10))

    dataset = Dataset(train_data,train_label,batch_size,shuffle=True)

    for e in range(epoch):
        print(f"{e}_{'*'*100}")
        for bi,(batch_data,batch_label) in enumerate(dataset):
            hidden = batch_data @ W1 + b1   # 1 * 784 @ 784 * 10 = 1 * 10
            tanh_hid = sigmoid(hidden)
            pre = tanh_hid @ W2 + b2
            soft_pre = softmax(pre)
            loss = -np.sum(batch_label*np.log(soft_pre)) / batch_data.shape[0]

            G2 = (soft_pre - batch_label ) / batch_data.shape[0]
            delta_W2 = tanh_hid.T @ G2
            delta_th = G2 @ W2.T

            G1 = delta_th * tanh_hid * (1-tanh_hid)
            delta_W1 = batch_data.T @ G1

            delta_b1 = np.sum(G1,axis=0,keepdims=True)
            delta_b2 = np.sum(G2,axis=0,keepdims=True)

            W1 -= lr * delta_W1
            W2 -= lr * delta_W2
            b1 -= lr * delta_b1
            b2 -= lr * delta_b2

            # if bi % 50 == 0 :
            #     print(loss)

        right_num = 0
        h = dev_data @ W1 + b1
        h = sigmoid(h)
        p = h @ W2 + b2
        p = np.argmax(p,axis=1)
        acc = sum(p == dev_label) / len(dev_label) * 100#p == dev_label  里面的答案都是fales和true，我们将其sum，false的个数就是0，true的个数就是1，最后除以总个数，乘以100就是正确率
        print(f"acc:{acc}%")

手写识别4

将封装了linner层

在里面完成求值求导的操作

import numpy as np
import struct
import os
import matplotlib.pyplot as plt

def load_labels(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]), dtype=np.int32)


def load_images(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
    return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

class Dataset:
    def __init__(self,x,y,batch_size , shuffle = True):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __iter__(self):
        temp = DataLoader(self)
        return temp

    def __len__(self):
        return len(self.x)

class DataLoader:
    def __init__(self,dataset):
        self.dataset = dataset
        self.cursor = 0

        self.indexs = np.arange(len(self.dataset))
        if self.dataset.shuffle:
            np.random.shuffle(self.indexs)

    def __next__(self):

        if self.cursor >= len(self.dataset):
            raise StopIteration

        index = self.indexs[self.cursor : self.cursor + self.dataset.batch_size]
        x = self.dataset.x[index]
        y = self.dataset.y[index]

        self.cursor += self.dataset.batch_size

        return x ,y

def label_to_onehot(labels,class_num=10):
    batch_num = labels.shape[0]
    result = np.zeros((batch_num,class_num))

    for i,l in enumerate(labels):
        result[i][l] = 1
    return result

def softmax(x):
    ex = np.exp(x)
    # sum_ex = np.sum(ex,axis=1).reshape(-1,1)
    sum_ex = np.sum(ex,axis=1,keepdims=True)
    return ex/sum_ex

def sigmoid(x):
    # x = np.clip(x,-1000,1000)
    return 1/(1+np.exp(-x))

def tanh(x):
    return 2 * sigmoid(2*x) - 1

class NN_layer:
    pass


class Linear(NN_layer):
    def __init__(self,in_feature,out_feature):
        self.W = np.random.normal(0,1,size=(in_feature,out_feature))
        self.b = np.zeros((1,out_feature))

    def forward(self,x):
        result = x @ self.W + self.b
        self.A = x
        return result

    def backward(self,G):
        delta_W = self.A.T @ G
        delta_b = np.sum(G,axis=0,keepdims=True)

        self.W -= delta_W * lr
        self.b -= delta_b * lr

        return G @ self.W.T


if __name__ == "__main__":
    train_data = load_images(os.path.join("..","data","train-images.idx3-ubyte")) / 255
    train_label = label_to_onehot(load_labels(os.path.join("..","data","train-labels.idx1-ubyte")))

    dev_data = load_images(os.path.join("..", "data", "t10k-images.idx3-ubyte")) / 255
    dev_label = load_labels(os.path.join("..", "data", "t10k-labels.idx1-ubyte"))

    # b1 = 0
    # b2 = 0
    lr = 0.1
    epoch = 1000
    batch_size = 100
    hidden_size = 256
    
    linear1 = Linear(784,hidden_size)
    linear2 = Linear(hidden_size,10)

    dataset = Dataset(train_data,train_label,batch_size,shuffle=True)

    for e in range(epoch):
        print(f"{e}_{'*'*100}")
        for bi,(batch_data,batch_label) in enumerate(dataset):
            hidden = linear1.forward(batch_data)
            tanh_hid = sigmoid(hidden)
            pre = linear2.forward(tanh_hid)
            soft_pre = softmax(pre)
            loss = -np.sum(batch_label*np.log(soft_pre)) / batch_data.shape[0]

            G2 = (soft_pre - batch_label ) / batch_data.shape[0]
            delta_th = linear2.backward(G2)

            G1 = delta_th * tanh_hid * (1-tanh_hid)
            linear1.backward(G1)

        right_num = 0
        h = linear1.forward(dev_data)
        h = sigmoid(h)
        p = linear2.forward(h)
        p = np.argmax(p,axis=1)
        acc = sum(p == dev_label) / len(dev_label) * 100
        print(f"acc:{acc}%")

手写识别5

添加了sigmoid层

在里面完成求值求导的操作

import numpy as np
import struct
import os
import matplotlib.pyplot as plt

def load_labels(file):  # 加载数据
    with open(file, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]),