基于全连接网络的MNIST识别

Galerkin码农选手

已于 2022-05-06 17:09:10 修改

阅读量483

点赞数

分类专栏： Deep learning 文章标签：深度学习

于 2021-05-08 10:41:22 首次发布

本文链接：https://blog.csdn.net/forrestguang/article/details/116519826

版权

Deep learning 专栏收录该内容

42 篇文章 70 订阅 ¥239.90 ¥399.90

订阅专栏

这篇博客主要介绍了如何使用手动实现的SGD算法在MNIST数据集和covertype数据集上进行深度学习识别。MNIST数据集是常用的图像识别训练集，而covertype数据集包含7类，需要预处理标签。文章提供相关代码展示。

摘要由CSDN通过智能技术生成

MNIST数据

这里大家都知道MNIST数据集是啥，所以不做过多说明，只放代码：

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.module = nn.Sequential(
            nn.Linear(784,500),
            nn.ReLU(),
            nn.Linear(500,300),
            nn.ReLU(),
            nn.Linear(300,10),
            nn.ReLU()
        )
    def forward(self,x):
        x = self.module(x)
        return x
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'D:\\python\\train-images-idx3-ubyte.gz'
y_train_path = 'D:\\python\\train-labels-idx1-ubyte.gz'
x_test_path = 'D:\\python\\t10k-images-idx3-ubyte.gz'
y_test_path = 'D:\\python\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()

net = Net()

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))

在这里插入图片描述

covertype

这个数据集一共有7类，而且是1一直到7，因此处理之前记得先把标签处理一下：

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip
import os
import torch.nn as nn
import time
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.module = nn.Sequential(
            nn.Linear(54,16),
            nn.Dropout(0.5),
            nn.LeakyReLU(inplace = True),
            nn.Linear(16,16),
            nn.Dropout(0.5),
            nn.LeakyReLU(inplace = True),
            nn.Linear(16,7),
            nn.LeakyReLU(inplace = True)
        )
    def forward(self,x):
        x = self.module(x)
        return x
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out
def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        loss = Loss(net,train_images[x:y],train_labels[x:y])
        train_acc += pred_acc(net,train_images[x:y],train_labels[x:y])
        optim.zero_grad()
        loss.backward()
        optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]
cover_path = 'C:\\Users\\Desktop\\covertype\\cover.data'
data = np.loadtxt('C:\\Users\\Desktop\\covertype\\cover.data',delimiter = ',')
tic = time.time()
train_num = 500000
train_images = torch.tensor(data[:train_num,:-1]).float()
train_labels = torch.tensor(data[:train_num,-1] - 1).long()
test_images = torch.tensor(data[train_num:,:-1]).float()
test_labels = torch.tensor(data[train_num:,-10] - 1).long()

net = Net()

optim = torch.optim.SGD(net.parameters(),lr = 5e-3,momentum = 0.78)
batch = 10000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))

下面是手动实现SGD算法代码

import numpy as np
from struct import unpack
import gzip
import random
import matplotlib.pyplot as plt
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def newlabel(label):
    lab = np.zeros([label.size,1])
    for i in range(label.size):
        if label[i]%2 == 0:
            lab[i,0] = -1
        else:
            lab[i,0] = label[i]%2
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=True):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = newlabel(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
def accuracy(x,y,w):
    pred = x@w
    leq = (pred <= 0).astype('float32')
    geq = (pred >0).astype('float32')
    label = -leq + geq
    acc = (label == y).astype('float32')
    return acc
def loss(x,y,w,lam):
    return np.log(1 + np.exp(-y*x@w)).mean() + lam*np.linalg.norm(w,1)
def grad_loss(x,y,w,lam):
    return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1) + lam*np.sign(w)
def grad_f(x,y,w):
    return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1)
def SGD(w0,x,y,lam,epoch):
    eps = 1e-8
    dim = x.shape[0]
    w = w0.copy()
    print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
    m = 0
    alpha = 1e-1
    while m < epoch:
        sk = random.choice(range(0,dim))
        d = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)#这个重点注意
        #alpha = gold(loss,grad_loss,x,y,w0,lam,d)
        w = w0 + alpha*d
        rho = np.linalg.norm(w - w0)
        if rho < eps:
            break
        else:
            w0 = w
            m = m + 1
        if (m + 1)%20 == 0:
            alpha = alpha*0.618
            w_grad = np.linalg.norm(grad_loss(x,y,w,lam))
            print('the iteration:%d,the err:%.3e,the grad:%.3e,the value:%.3e'%(m + 1,rho,w_grad,loss(x,y,w,lam)))
    return w
def SGDmom(w0,x,y,lam,epoch):
    eps = 1e-8
    dim = x.shape[0]
    w = np.zeros_like(w0)
    print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
    v0 = 0
    m = 0
    alpha = 1e-1
    while m < epoch:
        sk = random.choice(range(0,dim))
        diraction = -grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)
        v1 = (m + 2)*v0/(m + 5) + alpha*diraction
        w = w0 + v1
        
        rho = np.linalg.norm(w - w0)
        if rho < eps:
            break
        else:
            w0 = w
            m = m + 1
        if (m + 1)%40 == 0:
            alpha = alpha*0.618
            w_grad = np.linalg.norm(grad_loss(x,y,w,lam))
            print('the iteration:%d,the err:%.3e,the grad:%.3e,the value:%.3e'%(m + 1,rho,w_grad,loss(x,y,w,lam)))
    return w
def Nesterov(w0,x,y,lam,epoch):
    err = [];step = []
    eps = 1e-5
    dim = x.shape[0]
    w = np.zeros_like(w0)
    v0 = 0
    m = 0
    alpha = 1e-1
    #print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w))))
    #print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
    while m < epoch:
        sk = random.choice(range(0,dim))
        diraction = grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0 + (m + 2)*v0/(m + 5),lam)
        v1 = (m + 2)*v0/(m + 5) - alpha*diraction
        w = w0 + v1
        
        rho = np.linalg.norm(w - w0)
        if rho < eps:
            break
        else:
            w0 = w
            m = m + 1
        if (m + 1)%50 == 0:
            alpha = alpha*0.5
            acc = accuracy(x,y,w).sum()/x.shape[0]
            step.append(m + 1)
            err.append(1 - acc)
            print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
    return w,step,err
def Adagrad(w0,x,y,lam,epoch):
    err = [];step = []
    eps = 1e-8
    dim = x.shape[0]
    w = w0.copy()
    #print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w0))))
    #print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
    m = 0
    sk = random.choice(range(0,dim)) 
    d0 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)
    g0 = d0*d0
    alpha = 1e-1
    while m < epoch:
        w = w0 + alpha*d0/np.sqrt(g0 + eps)
        sk = random.choice(range(0,dim))
        d1 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w,lam)
        g1 = g0 + d1*d1
        rho = np.linalg.norm(w - w0)
        if rho < eps:
            break
        else:
            w0 = w
            d0 = d1
            g0 = g1
            m = m + 1
        if (m + 1)%50 == 0:
            alpha = alpha*0.5
            acc = accuracy(x,y,w).sum()/x.shape[0]
            step.append(m + 1)
            err.append(1 - acc)
            print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
    return w,step,err    
x_train_path = 'C:\\Users\\Desktop\\MNIST_data\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\Desktop\\MNIST_data\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\Desktop\\MNIST_data\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\Desktop\\MNIST_data\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)

wx = np.random.randn(784,1)
w0 = wx/max(abs(wx[:,0]))
x = x_train
y = y_train
epoch = 300
col = ['r','b','k','g']
dot = ['r*','bo','k.','g']
m = 0
for lam in [10,1,0.1,0.001]:
    
    st = time.time()
    #w,step,err = Nesterov(w0,x,y,lam,epoch)
    w,step,err = Adagrad(w0,x,y,lam,epoch)
    #w = SGD(w0,x,y,lam,epoch)
    acc = accuracy(x_test,y_test,w).sum()/x_test.shape[0]
    ela = time.time() - st
    plt.plot(step,err,col[m],label = 'lambda:%.2e'%(lam))
    plt.plot(step,err,dot[m])
    m = m + 1
    plt.xlabel('iteration')
    plt.ylabel('train error')
    plt.title('mnist:Adagrad')
    plt.legend(loc = 'upper right')
    print('now,the lambda:%.2e'%(lam))
    print('the iteration:%d,the test acc:%.3e,the train time:%.2f'%(epoch,acc,ela))
    
    print('---------------------------------')
    
plt.savefig('mnistA.jpg')

def _label(data):
    lab = np.zeros([data.shape[0],1])
    for i in range(data.shape[0]):
        if data[i,-1]%2 == 0:
            lab[i,0] = -1
        else:
            lab[i,0] = 1
    return lab.reshape(-1,1)
np.random.seed(1234)

def loss(x,y,w,lam):
    return np.log(1 + np.exp(-y*x@w)).mean() + lam*np.linalg.norm(w,1)
def grad_loss(x,y,w,lam):
    return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1) + lam*np.sign(w)
def grad_f(x,y,w):
    return (-y*x*np.exp(-y*x@w)/(1 + np.exp(-y*x@w))).mean(0).reshape(-1,1)
def accuracy(x,y,w):
    pred = x@w
    leq = (pred <= 0).astype('float32')
    geq = (pred >0).astype('float32')
    label = -leq + geq
    acc = (label == y).astype('float32')
    return acc
def Nesterov(w0,x,y,lam,epoch):
    err = [];step = []
    eps = 1e-5
    dim = x.shape[0]
    w = np.zeros_like(w0)
    v0 = 0
    m = 0
    alpha = 1e-1
    #print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w))))
    #print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
    while m < epoch:
        sk = random.choice(range(0,dim))
        diraction = grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0 + (m + 2)*v0/(m + 5),lam)
        v1 = (m + 2)*v0/(m + 5) - alpha*diraction
        w = w0 + v1
        
        rho = np.linalg.norm(w - w0)
        if rho < eps:
            break
        else:
            w0 = w
            m = m + 1
        if (m + 1)%50 == 0:
            alpha = alpha*0.5
            acc = accuracy(x,y,w).sum()/x.shape[0]
            step.append(m + 1)
            err.append(1 - acc)
            print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
    return w,step,err
def Adagrad(w0,x,y,lam,epoch):
    err = [];step = []
    eps = 1e-8
    dim = x.shape[0]
    w = w0.copy()
    #print('the no regular norm:%.2e'%(np.linalg.norm(grad_f(x,y,w0))))
    #print('the init norm:%.3e'%(np.linalg.norm(grad_loss(x,y,w0,lam))))
    m = 0
    sk = random.choice(range(0,dim)) 
    d0 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w0,lam)
    g0 = d0*d0
    alpha = 1e-1
    while m < epoch:
        w = w0 + alpha*d0/np.sqrt(g0 + eps)
        sk = random.choice(range(0,dim))
        d1 = - grad_loss(x[sk:sk + 1,:],y[sk:sk + 1,:],w,lam)
        g1 = g0 + d1*d1
        rho = np.linalg.norm(w - w0)
        if rho < eps:
            break
        else:
            w0 = w
            d0 = d1
            g0 = g1
            m = m + 1
        if (m + 1)%50 == 0:
            alpha = alpha*0.5
            acc = accuracy(x,y,w).sum()/x.shape[0]
            step.append(m + 1)
            err.append(1 - acc)
            print('the iteration:%d,the err:%.3e,the train acc:%.3e,the value:%.3e'%(m + 1,rho,acc,loss(x,y,w,lam)))
    return w,step,err    
st = time.time()
train_num = 500000
y_train = _label(data)[:train_num,:]
x_train = (data[:,:-1])[:train_num,:]
y_test = _label(data)[train_num:,:]
x_test = (data[:,:-1])[train_num:,:]
wx = np.random.randn(54,1)
w0 = wx/max(abs(wx[:,0]))
x = x_train/1e4
y = y_train
epoch = 300
col = ['r','b','k','g']
dot = ['r*','bo','k.','g']
m = 0
for lam in [10,1,0.1,0.001]:
    
    st = time.time()
    w,step,err = Nesterov(w0,x,y,lam,epoch)
    #w,step,err = Adagrad(w0,x,y,lam,epoch)
    #w = SGD(w0,x,y,lam,epoch)
    acc = accuracy(x_test,y_test,w).sum()/x_test.shape[0]
    ela = time.time() - st
    plt.plot(step,err,col[m],label = 'lambda:%.2e'%(lam))
    plt.plot(step,err,dot[m])
    m = m + 1
    plt.xlabel('iteration')
    plt.ylabel('train error')
    plt.title('covertype:Nesterov')
    plt.legend(loc = 'upper right')
    print('now,the lambda:%.2e'%(lam))
    print('the iteration:%d,the test acc:%.3e,the train time:%.2f'%(epoch,acc,ela))
    
    print('---------------------------------')
    
plt.savefig('coverN.jpg')