CNN实现MNIST数据集的图片分类

42 篇文章 68 订阅 ¥239.90 ¥99.00

FNN

先引入全连接神经网络预测MNIST的代码,为了方便后面修改为CNN网络,这里网络的输入参数引入了input_dim=784,hid_layers表示隐藏层数目和全连接层数目
在这里插入图片描述
fnn.py

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self,input_dim,hid_layers,k,dtype):
        super(Net,self).__init__()
        self.layers = [input_dim] + hid_layers
        self.layers_hid_num = len(self.layers)-2
        fc = []
        for i in range(self.layers_hid_num+1):
            fc.append(torch.nn.Linear(self.layers[i],self.layers[i+1]))
        self.fc = torch.nn.Sequential(*fc)
        for i in range(self.layers_hid_num+1):
            self.fc[i].weight.data = self.fc[i].weight.data.type(dtype)
            self.fc[i].bias.data = self.fc[i].bias.data.type(dtype)
       
        #----------
        

    
    def Dense(self,x):
        for i in range(self.layers_hid_num):
            x = torch.relu(self.fc[i](x))#.to(device)
            temp = torch.eye(x.shape[-1],self.layers[i+1])
            x = x + x@temp
        return self.fc[-1](x)
    def forward(self,x):
        
        dense_output = self.Dense(x)
        
        output = dense_output
        
        return output
    
    def total_para(self):#计算参数数目
        return sum([x.numel() for x in self.parameters()])  
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'C:\\Users\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
input_dim = 784
hid_layers = [64,10]
k = 10
dtype = torch.float32
net = Net(input_dim,hid_layers,k,dtype)

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))



FNN+FM

这里引入因子分解技术,增加一个额外的线性层,不过效果没有明显提升
在这里插入图片描述
fnnfm.py

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self,input_dim,hid_layers,k,dtype):
        super(Net,self).__init__()
        self.layers = [input_dim] + hid_layers
        self.layers_hid_num = len(self.layers)-2
        fc = []
        for i in range(self.layers_hid_num+1):
            fc.append(torch.nn.Linear(self.layers[i],self.layers[i+1]))
        self.fc = torch.nn.Sequential(*fc)
        for i in range(self.layers_hid_num+1):
            self.fc[i].weight.data = self.fc[i].weight.data.type(dtype)
            self.fc[i].bias.data = self.fc[i].bias.data.type(dtype)
       
        
        #--------
        self.w = torch.nn.Linear(input_dim,hid_layers[-1])
        fm = [];fm.append(self.w)
        self.fm = torch.nn.Sequential(*fm)
        self.fm[0].weight.data = self.fm[0].weight.data.type(dtype)
        self.fm[0].bias.data = self.fm[0].bias.data.type(dtype)
        self.v = torch.nn.Parameter(torch.FloatTensor(torch.rand(input_dim,k)), requires_grad=True)
        
        self.v = self.v.type(dtype)
        #----------
        

    def FM(self,x):
        linear_part = self.fm[0](x)
        inner_part1 = torch.pow(x,2)@torch.pow(self.v,2)
        inner_part2 = torch.pow((x@self.v),2)
        inner_s = 0.5*(inner_part2 - inner_part1).sum(axis = 1,keepdims = True)
        inner = inner_s.repeat(1,self.layers[-1])
        
        return linear_part + inner
        

    
    def Dense(self,x):
        for i in range(self.layers_hid_num):
            x = torch.relu(self.fc[i](x))#.to(device)
            temp = torch.eye(x.shape[-1],self.layers[i+1])
            x = x + x@temp
        return self.fc[-1](x)
    def forward(self,x):
        fm_output = self.FM(x)
        dense_output = self.Dense(x)
        
        output = dense_output + fm_output
        
        return output
    
    def total_para(self):#计算参数数目
        return sum([x.numel() for x in self.parameters()])  
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
input_dim = 784
hid_layers = [64,10]
k = 10
dtype = torch.float32
net = Net(input_dim,hid_layers,k,dtype)

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))



1维卷积的引入

1维卷积的引入可以降低参数量,但是发现引入卷积运算以后,代码训练速度明显慢了很多,这里值得考虑。
在这里插入图片描述

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self,input_dim,hid_layers,k,dtype):
        super(Net,self).__init__()
        self.kernel_size = [10,10,6]
        self.stride = [1,1,2]
        cov = []
        out_dim = [3,3,1]
        in_dim = 1
        for i in range(len(self.kernel_size)):
            cov.append(nn.Conv1d(in_channels = in_dim,out_channels = out_dim[i],kernel_size = self.kernel_size[i],stride = self.stride[i]))
            input_dim = (input_dim - self.kernel_size[i])//self.stride[i] + 1
            in_dim = out_dim[i]
        self.cov = torch.nn.Sequential(*cov)
        
        self.layers = [input_dim] + hid_layers
        self.layers_hid_num = len(self.layers)-2
        fc = []
        for i in range(self.layers_hid_num+1):
            fc.append(torch.nn.Linear(self.layers[i],self.layers[i+1]))
        self.fc = torch.nn.Sequential(*fc)
        for i in range(self.layers_hid_num+1):
            self.fc[i].weight.data = self.fc[i].weight.data.type(dtype)
            self.fc[i].bias.data = self.fc[i].bias.data.type(dtype)
       
        
    def CNN(self,x):
        h = x.reshape(x.shape[0],1,x.shape[1])
        for i in range(len(self.cov)):
            h = self.cov[i](h)
            h = torch.relu(h)
        return h.squeeze(dim = 1)
    
    def Dense(self,x):
        for i in range(self.layers_hid_num):
            x = torch.relu(self.fc[i](x))#.to(device)
            temp = torch.eye(x.shape[-1],self.layers[i+1])
            x = x + x@temp
        return self.fc[-1](x)
    def forward(self,x):
        x = self.CNN(x)
        dense_output = self.Dense(x)
        
        output = dense_output
        
        return output
    
    def total_para(self):#计算参数数目
        return sum([x.numel() for x in self.parameters()])  
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
input_dim = 784
hid_layers = [64,10]
k = 10
dtype = torch.float32
net = Net(input_dim,hid_layers,k,dtype)

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))



二维卷积的引入

MNIST数据集是图片,因此可以对数据集进行一个修改,得到[N,1,28,28]的数据集,其中N是batch,1是输入维度,如果是RGB图像输入维度就是3,28*28是图片的宽和高。这里面还没引用batchnorm,pool
在这里插入图片描述

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self,weight_dim,high_dim,hid_layers,dtype):
        super(Net,self).__init__()
        self.kernel_size = [3,3,3]
        self.stride = [[2,1],[1,2],[1,1]]
        cov = []
        out_dim = [3,3,1]
        in_dim = 1
        for i in range(len(self.kernel_size)):
            cov.append(nn.Conv2d(in_channels = in_dim,out_channels = out_dim[i],kernel_size = self.kernel_size[i],stride = self.stride[i]))
            weight_dim = (weight_dim - self.kernel_size[i])//self.stride[i][0] + 1
            high_dim = (high_dim - self.kernel_size[i])//self.stride[i][1] + 1
            in_dim = out_dim[i]
        self.cov = torch.nn.Sequential(*cov)
        input_dim = weight_dim*high_dim
        self.layers = [input_dim] + hid_layers
        self.layers_hid_num = len(self.layers)-2
        fc = []
        for i in range(self.layers_hid_num+1):
            fc.append(torch.nn.Linear(self.layers[i],self.layers[i+1]))
        self.fc = torch.nn.Sequential(*fc)
        for i in range(self.layers_hid_num+1):
            self.fc[i].weight.data = self.fc[i].weight.data.type(dtype)
            self.fc[i].bias.data = self.fc[i].bias.data.type(dtype)
       
        
    def CNN(self,h):
        #h = x.reshape(x.shape[0],1,x.shape[1],x.shape[2])
        for i in range(len(self.cov)):
            h = self.cov[i](h)
            h = torch.relu(h)
            #h = nn.BatchNorm2d(h.shape[1])
        return (h.squeeze(dim = 1)).reshape(h.shape[0],-1)
    
    def Dense(self,x):
        for i in range(self.layers_hid_num):
            x = torch.relu(self.fc[i](x))#.to(device)
            temp = torch.eye(x.shape[-1],self.layers[i+1])
            x = x + x@temp
        return self.fc[-1](x)
    def forward(self,x):
        x = self.CNN(x)
        dense_output = self.Dense(x)
        
        output = dense_output
        
        return output
    
    def total_para(self):#计算参数数目
        return sum([x.numel() for x in self.parameters()])  
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\2001213226\\Desktop\\icbc\\deepfm\\mnist_dataset\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
x_train = x_train.reshape(x_train.shape[0],1,28,28)
x_test = x_test.reshape(x_test.shape[0],1,28,28)
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
weight_dim = 28
high_dim = 28
hid_layers = [64,10]

dtype = torch.float32
net = Net(weight_dim,high_dim,hid_layers,dtype)

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 8
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))



反1维卷积

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self,layers_q,layers_hid,dtype):
        super(Net,self).__init__()
        self.dtype = dtype
        self.layers_q = layers_q
        self.layers_qhid_num = len(self.layers_q) - 2
        fc_q = []
        for i in range(self.layers_qhid_num + 1):
            fc_q.append(torch.nn.Linear(self.layers_q[i],self.layers_q[i + 1]))
        self.fc_q = torch.nn.Sequential(*fc_q)
        for i in range(self.layers_qhid_num + 1):
            self.fc_q[i].weight.data = self.fc_q[i].weight.data.type(dtype)
            self.fc_q[i].bias.data = self.fc_q[i].bias.data.type(dtype)
        #-----------------
        input_dim = layers_q[-1]
        self.iker = [10,6]
        self.istride = [2,1]
        icov = []
        iout_dim = [3,1]
        iin_dim = 1
        for i in range(len(self.iker)):
            icov.append(nn.ConvTranspose1d(in_channels = iin_dim,out_channels = iout_dim[i],kernel_size = self.iker[i],stride = self.istride[i]))
            input_dim = (input_dim - 1)*self.istride[i] + self.iker[i]
            iin_dim = iout_dim[i]
        self.icov = torch.nn.Sequential(*icov)
        for i in range(len(self.iker)):
            self.icov[i].weight.data = self.icov[i].weight.data.type(dtype)
            self.icov[i].bias.data = self.icov[i].bias.data.type(dtype)
        
        #---------------------------
        self.kernel_size = [10,6]
        self.stride = [2,2]
        cov = []
        out_dim = [3,1]
        in_dim = 1
        for i in range(len(self.kernel_size)):
            cov.append(nn.Conv1d(in_channels = in_dim,out_channels = out_dim[i],kernel_size = self.kernel_size[i],stride = self.stride[i]))
            input_dim = (input_dim - self.kernel_size[i])//self.stride[i] + 1
            in_dim = out_dim[i]
        self.cov = torch.nn.Sequential(*cov)
        for i in range(len(self.kernel_size)):
            self.cov[i].weight.data = self.cov[i].weight.data.type(dtype)
            self.cov[i].bias.data = self.cov[i].bias.data.type(dtype)
        #-------------------------------
        
        self.layers_hid = [input_dim] + layers_hid
        self.layers_hid_num = len(self.layers_hid) - 2
        fc_hid = []
        for i in range(self.layers_hid_num + 1):
            fc_hid.append(torch.nn.Linear(self.layers_hid[i],self.layers_hid[i+1]))
        self.fc_hid = torch.nn.Sequential(*fc_hid)
        for i in range(self.layers_hid_num + 1):
            self.fc_hid[i].weight.data = self.fc_hid[i].weight.data.type(dtype)
            self.fc_hid[i].bias.data = self.fc_hid[i].bias.data.type(dtype)
    def fnn_q(self,x):
        for i in range(self.layers_qhid_num):
            x = torch.sin(self.fc_q[i](x))
            temp = torch.eye(x.shape[-1],self.layers_q[i + 1],dtype = self.dtype,device = x.device)
            x = x + x@temp
        return self.fc_q[-1](x)
    def icnn(self,x):
        h = x.reshape(x.shape[0],1,x.shape[1])
        for i in range(len(self.icov)):
            h = self.icov[i](h)
            h = torch.sin(h)
        return h
    def cnn(self,h):
        for i in range(len(self.cov)):
            h = self.cov[i](h)
            h = torch.sin(h)
        return h.squeeze(dim = 1)
    
    def fnn_h(self,x):
        for i in range(self.layers_hid_num):
            x = torch.relu(self.fc_hid[i](x))#.to(device)
            temp = torch.eye(x.shape[-1],self.layers_hid[i + 1],dtype = self.dtype,device = x.device)
            x = x + x@temp
        return self.fc_hid[-1](x)
    def forward(self,x):
        x = self.fnn_q(x)#;print(x.shape)
        x = self.icnn(x)#;print(x.shape)
        x = self.cnn(x)#;print(x.shape)
        x = self.fnn_h(x)
        return x
    
    def total_para(self):#计算参数数目
        return sum([x.numel() for x in self.parameters()])
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()
train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
input_dim = 784

dtype = torch.float32
layers_q = [input_dim,10,10]
layers_hid = [20,10]
net = Net(layers_q,layers_hid,dtype)

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 4
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))
plt.imshow(x_train[0,:].reshape(28,28))



在这里插入图片描述

在这里插入图片描述

反二维卷积

import numpy as np
import matplotlib.pyplot as plt
import torch
from struct import unpack
import gzip

import torch.nn as nn
import time
np.random.seed(1234)
def __read_image(path):
    with gzip.open(path, 'rb') as f:
        magic, num, rows, cols = unpack('>4I', f.read(16))
        img=np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28*28)
    return img

def __read_label(path):
    with gzip.open(path, 'rb') as f:
        magic, num = unpack('>2I', f.read(8))
        lab = np.frombuffer(f.read(), dtype=np.uint8)
        # print(lab[1])
    return lab
    
def __normalize_image(image):
    img = image.astype(np.float32) / 255.0
    return img
def __one_hot_label(label):
    lab = np.zeros((label.shape[0], 10))
    for i, row in enumerate(lab):
        row[label[i]] = 1
    return lab

def load_mnist(x_train_path, y_train_path, x_test_path, y_test_path, normalize=True, one_hot=False):
    image = {
        'train' : __read_image(x_train_path),
        'test'  : __read_image(x_test_path)
    }

    label = {
        'train' : __read_label(y_train_path),
        'test'  : __read_label(y_test_path)
    }
    
    if normalize:
        for key in ('train', 'test'):
            image[key] = __normalize_image(image[key])
    
    if one_hot:
        for key in ('train', 'test'):
            label[key] = __one_hot_label(label[key])
    
    return (image['train'], label['train']), (image['test'], label['test'])
class Net(nn.Module):
    def __init__(self,layers_q,layers_hid,dtype):
        super(Net,self).__init__()
        self.dtype = dtype
        self.layers_q = layers_q
        self.layers_qhid_num = len(self.layers_q) - 2
        fc_q = []
        for i in range(self.layers_qhid_num + 1):
            fc_q.append(torch.nn.Linear(self.layers_q[i],self.layers_q[i + 1]))
        self.fc_q = torch.nn.Sequential(*fc_q)
        for i in range(self.layers_qhid_num + 1):
            self.fc_q[i].weight.data = self.fc_q[i].weight.data.type(dtype)
            self.fc_q[i].bias.data = self.fc_q[i].bias.data.type(dtype)
        #-----------------
        input_dim = layers_q[-1]
        high_dim = 4
        weight_dim = 4
        self.iker = [10,6]
        self.istride = [[2,1],[2,1]]
        icov = []
        iout_dim = [3,1]
        iin_dim = 1
        for i in range(len(self.iker)):
            icov.append(nn.ConvTranspose2d(in_channels = iin_dim,out_channels = iout_dim[i],kernel_size = self.iker[i],stride = self.istride[i]))
            high_dim = (high_dim - 1)*self.istride[i][0] + self.iker[i]
            weight_dim = (weight_dim - 1)*self.istride[i][1] + self.iker[i]
            iin_dim = iout_dim[i]
        self.icov = torch.nn.Sequential(*icov)
        for i in range(len(self.iker)):
            self.icov[i].weight.data = self.icov[i].weight.data.type(dtype)
            self.icov[i].bias.data = self.icov[i].bias.data.type(dtype)
        
        #---------------------------
        self.ker = [10,6]
        self.stride = [[2,1],[1,2]]
        cov = []
        out_dim = [3,1]
        in_dim = 1
        for i in range(len(self.ker)):
            cov.append(nn.Conv2d(in_channels = in_dim,out_channels = out_dim[i],kernel_size = self.ker[i],stride = self.stride[i]))
            high_dim = (high_dim - self.ker[i])//self.stride[i][0] + 1
            weight_dim = (weight_dim - self.ker[i])//self.stride[i][1] + 1
            in_dim = out_dim[i]
        self.cov = torch.nn.Sequential(*cov)
        for i in range(len(self.ker)):
            self.cov[i].weight.data = self.cov[i].weight.data.type(dtype)
            self.cov[i].bias.data = self.cov[i].bias.data.type(dtype)
        #-------------------------------
        input_dim = high_dim*weight_dim
        
        self.layers_hid = [input_dim] + layers_hid
        self.layers_hid_num = len(self.layers_hid) - 2
        fc_hid = []
        for i in range(self.layers_hid_num + 1):
            fc_hid.append(torch.nn.Linear(self.layers_hid[i],self.layers_hid[i+1]))
        self.fc_hid = torch.nn.Sequential(*fc_hid)
        for i in range(self.layers_hid_num + 1):
            self.fc_hid[i].weight.data = self.fc_hid[i].weight.data.type(dtype)
            self.fc_hid[i].bias.data = self.fc_hid[i].bias.data.type(dtype)
    def fnn_q(self,x):
        for i in range(self.layers_qhid_num):
            x = torch.sin(self.fc_q[i](x))
            temp = torch.eye(x.shape[-1],self.layers_q[i + 1],dtype = self.dtype,device = x.device)
            x = x + x@temp
        return self.fc_q[-1](x)
    def icnn(self,x):
        h = x.reshape(x.shape[0],1,4,4)
        for i in range(len(self.icov)):
            h = self.icov[i](h)
            h = torch.sin(h)
        return h
    def cnn(self,h):
        for i in range(len(self.cov)):
            h = self.cov[i](h)
            h = torch.sin(h)
        return (h.squeeze(dim = 1)).reshape(h.shape[0],-1)
    
    def fnn_h(self,x):
        for i in range(self.layers_hid_num):
            x = torch.relu(self.fc_hid[i](x))#.to(device)
            temp = torch.eye(x.shape[-1],self.layers_hid[i + 1],dtype = self.dtype,device = x.device)
            x = x + x@temp
        return self.fc_hid[-1](x)
    def forward(self,x):
        x = self.fnn_q(x)#;print(x.shape)
        x = self.icnn(x)#;print(x.shape)
        x = self.cnn(x)#;print(x.shape)
        x = self.fnn_h(x)
        return x
    
    def total_para(self):#计算参数数目
        return sum([x.numel() for x in self.parameters()])
def pred(net,images):
    return net.forward(images).argmax(dim = 1)
def SOmax(X,y):
    y = torch.tensor(__one_hot_label(y))
    return (torch.exp(X)*y).sum(1,keepdims = True)/torch.exp(X).sum(1,keepdims = True)
'''
def Loss(net,images,labels):
    
    
    logits = net.forward(images)
    
    loss = -torch.log(SOmax(logits,labels)).mean()
    return loss
'''
def Loss(net,images,labels):
    criteon = nn.CrossEntropyLoss()
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    out = criteon(logits,labels)
    return out

def pred_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum()
def Train(net,train_images,train_labels,batch,epoch,optim):
    m = 0
    iter_num = int(epoch*train_images.shape[0]/batch)
    train_acc = 0
    for i in range(iter_num):
        x = i*batch%len(train_images)
        y = x + batch
        
        for k in range(1):
            loss = Loss(net,train_images[x:y],train_labels[x:y])
        
            optim.zero_grad()
            loss.backward()
            optim.step()
        if i%20 == 0:
            print('the iteration:%d,the batch_loss:%.3e,the trained photo:%d'%(i + 20,loss.item(),y))
        elif (i + 1)*batch%len(train_images) == 0:
            train_acc = pred_acc(net,train_images,train_labels)
            m = m + 1
            acc = train_acc.item()/train_images.shape[0]
            print('the epoch:%d,the acc:%.2f'%(m,acc))
            train_acc = 0
def test_acc(net,images,labels):
    logits = net.forward(images)
    pred_label = logits.argmax(dim = 1)
    return pred_label.eq(labels).sum().item()/images.shape[0]

x_train_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\train-images-idx3-ubyte.gz'
y_train_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\train-labels-idx1-ubyte.gz'
x_test_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\t10k-images-idx3-ubyte.gz'
y_test_path = 'C:\\Users\\2001213226\\Desktop\\mnist_dataset\\t10k-labels-idx1-ubyte.gz'
(x_train,y_train),(x_test,y_test)=load_mnist(x_train_path, y_train_path, x_test_path, y_test_path)
tic = time.time()


train_images = torch.tensor(x_train).float()
train_labels = torch.tensor(y_train).long()
test_images = torch.tensor(x_test).float()
test_labels = torch.tensor(y_test).long()
input_dim = 784

dtype = torch.float32
layers_q = [input_dim,10,16]
layers_hid = [20,10]
net = Net(layers_q,layers_hid,dtype)

#optim = torch.optim.SGD(net.parameters(),lr = 1e-3,momentum = 0.78)
optim = torch.optim.Adam(net.parameters(),lr = 1e-3,betas=(0.9,0.999))
batch = 1000
epoch = 4
Train(net,train_images,train_labels,batch,epoch,optim)
ela = time.time() - tic
print('the time:%.2f,the test_acc:%.2f'%(ela,test_acc(net,test_images,test_labels)))
plt.imshow(x_train[0,:].reshape(28,28))



在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
利用tensorflow实现的卷积神经网络来进行MNIST手写数字图像的分类。 #导入numpy模块 import numpy as np #导入tensorflow模块,程序使用tensorflow来实现卷积神经网络 import tensorflow as tf #下载mnist数据集,并从mnist_data目录中读取数据 from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('mnist_data',one_hot=True) #(1)这里的“mnist_data” 是和当前文件相同目录下的一个文件夹。自己先手工建立这个文件夹,然后从https://yann.lecun.com/exdb/mnist/ 下载所需的4个文件(即该网址中第三段“Four files are available on this site:”后面的四个文件),并放到目录MNIST_data下即可。 #(2)MNIST数据集是手写数字字符的数据集。每个样本都是一张28*28像素的灰度手写数字图片。 #(3)one_hot表示独热编码,其值被设为true。在分类问题的数据集标注时,如何不采用独热编码的方式, 类别通常就是一个符号而已,比如说是9。但如果采用独热编码的方式,则每个类表示为一个列表list,共计有10个数值,但只有一个为1,其余均为0。例如,“9”的独热编码可以为[00000 00001]. #定义输入数据x和输出y的形状。函数tf.placeholder的目的是定义输入,可以理解为采用占位符进行占位。 #None这个位置的参数在这里被用于表示样本的个数,而由于样本个数此时具体是多少还无法确定,所以这设为None。而每个输入样本的特征数目是确定的,即为28*28。 input_x = tf.placeholder(tf.float32,[None,28*28])/255 #因为每个像素的取值范围是 0~255 output_y = tf.placeholder(tf.int32,[None,10]) #10表示10个类别 #输入层的输入数据input_x被reshape成四维数据,其中第一维的数据代表了图片数量 input_x_images = tf.reshape(input_x,[-1,28,28,1]) test_x = mnist.test.images[:3000] #读取测试集图片的特征,读取3000个图片 test_y = mnist.test.labels[:3000] #读取测试集图片的标签。就是这3000个图片所对应的标签

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Galerkin码农选手

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值