数据下载&训练代码
### mnistGPU.py
import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torch.autograd import Variable
import time
data_tf = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.5],[0.5])])
#准备训练数据
train_set = torchvision.datasets.MNIST('./data', train=True, transform=data_tf, download=True)
#准备测试数据
test_set = torchvision.datasets.MNIST('./data', train=False, transform=data_tf, download=True)
batchSize = 64
train_data = DataLoader(train_set, batch_size=batchSize, shuffle=True) # 64
test_data = DataLoader(test_set, batch_size=128, shuffle=False) # 128
#示例网络1
class fc_net_2layer(nn.Module):
def __init__(self):
super(fc_net_2layer, self).__init__()
self.fc = nn.Sequential(
nn.Linear(28 * 28, 10),
nn.ReLU(),
nn.Linear(10, 10) #最后输出10个分类
)
def forward(self, x):
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
#示例网络2
class fc_net_4layer(nn.Module):
def __init__(self):
super(fc_net_4layer, self).__init__()
self.fc = nn.Sequential(
nn.Linear(28 * 28, 400),
nn.ReLU(),
nn.Linear(400, 200),
nn.ReLU(),
nn.Linear(200, 100),
nn.ReLU(),
nn.Linear(100, 10) #最后输出10个分类
)
def forward(self, x):
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
#示例网络3
class CNN(nn.Module):
def __init__(self):
super(CNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1,16,kernel_size=3), # 16, 26 ,26
nn.BatchNorm2d(16),
nn.ReLU(inplace=True))
self.layer2 = nn.Sequential(
nn.Conv2d(16,32,kernel_size=3),# 32, 24, 24
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2,stride=2)) # 32, 12,12 (24-2) /2 +1
self.layer3 = nn.Sequential(
nn.Conv2d(32,64,kernel_size=3), # 64,10,10
nn.BatchNorm2d(64),
nn.ReLU(inplace=True))
self.layer4 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=3), # 128,8,8
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2,stride=2)) # 128, 4,4
self.fc = nn.Sequential(
nn.Linear(128 * 4 * 4,1024),
nn.ReLU(inplace=True),
nn.Linear(1024,128),
nn.ReLU(inplace=True),
nn.Linear(128,10))
def forward(self,x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(x.size(0),-1)
x = self.fc(x)
return x
#网络参数数量
def get_parameter_number(net):
total_num = sum(p.numel() for p in net.parameters())
trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad)
return {'Total': total_num, 'Trainable': trainable_num}
#选择示例网络
# net = fc_net_2layer()
# net = fc_net_4layer()
net = CNN()
print(net)
use_gpu = torch.cuda.is_available()
print(get_parameter_number(net))
#设置损失函数
criterion = nn.CrossEntropyLoss()
#设置网络优化方式
learingrate = 0.01
optimizer = torch.optim.SGD(net.parameters(), lr=learingrate) #学习率0.1 0.01
if(use_gpu):
net = net.cuda()
criterion = criterion.cuda()
losses = []
acces = []
eval_losses = []
eval_acces = []
trainloss = []
start = time.time()
#开始训练
for e in range(20):
train_loss = 0
train_acc = 0
net.train()
for im, label in train_data:
if (use_gpu):
im, label = im.cuda(),label.cuda()
im = Variable(im)
label = Variable(label)
# 前向传播
out = net(im)
loss = criterion(out, label)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录误差
train_loss += loss.item()
# 计算分类的准确率
_, pred = out.max(1)
num_correct = (pred == label).sum().item()
acc = num_correct / im.shape[0]
train_acc += acc
#print('epoch: {}, Batch Train Loss: {:.6f}, Bacth Train Acc: {:.6f}'.format(e, loss.item(), acc))
losses.append(train_loss / len(train_data))
acces.append(train_acc / len(train_data))
# 在测试集上检验效果
eval_loss = 0
eval_acc = 0
net.eval() # 将模型改为预测模式
for im, label in test_data:
if (use_gpu):
im, label = im.cuda(),label.cuda()
im = Variable(im)
label = Variable(label)
out = net(im)
loss = criterion(out, label)
# 记录误差
eval_loss += loss.item()
# 记录准确率
_, pred = out.max(1)
num_correct = (pred == label).sum().item()
acc = num_correct / im.shape[0]
eval_acc += acc
#print('epoch: {}, Batch Evaluate Loss: {:.6f}, Bacth Evaluate Acc: {:.6f}'.format(e, loss.item(), acc))
eval_losses.append(eval_loss / len(test_data))
eval_acces.append(eval_acc / len(test_data))
print('***** One epoch has finished ******')
print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
.format(e, train_loss / len(train_data), train_acc / len(train_data),
eval_loss / len(test_data), eval_acc / len(test_data)))
trainloss.append(train_loss / len(train_data))
end = time.time()
print('The code run{:.2f}s'.format(end - start))
filename = 'epochs.txt'
with open(filename, 'a') as name:
name.write('CNN,')
for i in range(len(trainloss)-1):
name.write(str(trainloss[i])+',')
name.write(str(trainloss[-1]) + '\n')
name.close()
运行:
python mnistGPU.py
结果
1. CNN结构
2. fc_net_4layer
3. fc_net_2layer
4. 三种网络对比
net | 总para量 | runtime | last epoch tranloss | acc | eval loss | eval acc |
---|---|---|---|---|---|---|
CNN | 2328298 | 606.21s | 0.002778 | 0.999883 | 0.020380 | 0.99374 |
fc_net_4layer | 415310 | 263.78s | 0.069752 | 0.979761 | 0.100663 | 0.967168 |
fc_net_2layer | 7960 | 265.27s | 0.254767 | 0.927056 | 0.260187 | 0.923853 |
遇到的问题
1. conda install tensorflow 报错
解决: 用pip install。。。。
ps:查看tensowflow版本:
import tensorflow as tf
print(tf.__version__)
2. unindent does not match any outer indentation level
解决:缩进问题。。没对齐