问题
查看非叶子结点的梯度,不是none,如果把全连接层的激活函数删掉,结果一样,显然是激活函数的原因,因为loaddata函数
在处理数据的时候把数据所放在(-1,1)的区间中了,所以用relu函数在<0的时候, 基本和神经元死亡没啥区别了,那么前向死亡,反向传播就更别想了,早点睡吧,赶紧换,sigmoid都比relu强… 然后去掉dropout函数 减少训练的时间
话不多说先上代码
import time
import torch
import torchvision
import torchvision.transforms as transforms
# import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import vgg
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# self.f = stack_big((2, 2, 3, 3, 3), ((3, 64), (64, 128), (128, 256), (256, 512)))
net = []
for n, c in zip((2, 2, 5, 3), ((3, 64), (64, 128), (128, 256), (256, 512))):
in_c = c[0]
out_c = c[1]
net += [stack_mini(n, in_c, out_c)]
# self.f = nn.Sequential(*net)
self.f = nn.Sequential(*net)
# print(self.f)
self.fc = nn.Sequential(
nn.Linear(2 * 2 * 512, 384),
nn.SELU(True),
# nn.Tanh(),
nn.Linear(384, 192),
nn.SELU(True),
# nn.Tanh(),
nn.Linear(192, 10)
)
def forward(self, x):
# x = self.f(x)
x = self.f[0](x)
x = self.f[1](x)
x = self.f[2](x)
x = self.f[3](x)
x = x.view(x.shape[0], -1)
x = self.fc(x)
return x
def stack_mini(num_convs, in_channels, out_channels):
'''
block块 开始以卷积层开始,结束以池化层结束 过程中提取高纬数据
:param num_convs: 循环次数
:param in_channels: 输入通道数
:param out_channels: 输出通道数
:return:
'''
# 定义第一层并转换为list
net = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(True)] # 卷积+激活层
# 循环定义其它层
for i in range(num_convs - 1): # 卷积层+激活层
# net.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
# net.append(nn.ReLU(True))
net += [nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(True)]
# 定义池化层
# net.append(nn.MaxPool2d(2, 2)) # 2*2 步长为2
net += [nn.MaxPool2d(2, 2)]
# return net
return nn.Sequential(*net)
def stack_big(num_convs, channels):
'''
创建数据提取模块
:param num_convs:[循环次数]list
:param channels:[(输入,输出),(输入维度,输出维度)]list
:return:数据特征提取网络
'''
net = []
for n, c in zip(num_convs, channels):
in_c = c[0]
out_c = c[1]
net += [stack_mini(n, in_c, out_c)]
return net
def loadData():
tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root=