1. 数据集
每个目录下对应数字1000张左右
2. 训练
#coding=utf-8
import torchvision
from torchvision import datasets, transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import matplotlib.pyplot as plt
import cv2
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5, padding=2) #pytorch文档,torch.nn.Conv2d函数参数定义
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120) #全连接层就是线性层
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.tanh(self.conv1(x)), (2, 2))
x = F.dropout(x, p = 0.3, training=self.training)
x = F.max_pool2d(F.tanh(self.conv2(x)), (2, 2))
x = F.dropout(x, p = 0.3, training=self.training)
x = x.view(-1, self.num_flat_features(x)) # view函数用来改变维度,-1是占位符
x = F.tanh(self.fc1(x))
x = F.dropout(x, p = 0.3, training=self.training)
x = F.tanh(self.fc2(x))
x = F.dropout(x, p = 0.3, training=self.training)
x = self.fc3(x)
return x
# 定义num_flat_features函数进行尺度的变换
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
def train(epoch):
batch_size = 20
dataset = datasets.ImageFolder(
'data',
transforms.Compose([
transforms.Grayscale(num_output_channels=3),
#transforms.RandomResizedCrop(28),
transforms.Resize((28, 28)),
#transforms.RandomHorizontalFlip(),
transforms.ToTensor()
])
)
train_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=False)
images, labels = next(iter(train_loader))
img = torchvision.utils.make_grid(images)
model = LeNet5()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
model.train()
device = torch.device('cuda')
model.to(device)
for epoch in range(epoch):
sum_loss = 0.0
for i, data in enumerate(train_loader):
inputs, labels = data
inputs, labels = inputs.to(device),labels.to(device)
optimizer.zero_grad() #将梯度归零
outputs = model(inputs) #将数据传入网络进行前向运算
loss = criterion(outputs, labels) #得到损失函数
loss.backward() #反向传播
optimizer.step() #通过梯度做一步参数更新
test_error_count = 0.0
#print ("runing",inputs.shape)
sum_loss += loss.item()
test_acc = 0
if i % 100 == 0:
for images, labels in iter(test_loader):
images = Variable(images).cuda()
labels = Variable(labels).cuda()
output = model(images)
labels = (labels)
predicted = (output.argmax(1))
test_acc += (predicted == labels).sum()
ave_test_acc = test_acc.item()/len(dataset)
#print("---------------",test_acc.item(),len(dataset),"---------------")
print('[%d,%d] loss:%.03f acc:%.04f' %
(epoch + 1, i + 1, sum_loss / 100,ave_test_acc))
if ave_test_acc>0=.95:
torch.save(model.state_dict(), "model_.pth")
torch.save(model.state_dict(), "model_.pth")
def main():
train(10)
if __name__ == "__main__":
main()
思路:
首先在data目录下放入分类目录,对应目录存放对应文件;
使用迭代器存放加载数据集,节省空间;
选择卷积神经网络模型进行训练;
计算准确率及loss值。
当准确率达到95%时保存文件,当迭代结束时再次保存文件,防止结束时准确率不足未保存模型。
该训练集及测试集采用了同一个,共有10000张40*40的手写数字图片。
3. 预测
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import torch.nn.functional as F
import random
from torch.autograd import Variable
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5, padding=2) #pytorch文档,torch.nn.Conv2d函数参数定义
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120) #全连接层就是线性层
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.tanh(self.conv1(x)), (2, 2))
x = F.dropout(x, p = 0.3, training=self.training)
x = F.max_pool2d(F.tanh(self.conv2(x)), (2, 2))
x = F.dropout(x, p = 0.3, training=self.training)
x = x.view(-1, self.num_flat_features(x)) # view函数用来改变维度,-1是占位符
x = F.tanh(self.fc1(x))
x = F.dropout(x, p = 0.3, training=self.training)
x = F.tanh(self.fc2(x))
x = F.dropout(x, p = 0.3, training=self.training)
x = self.fc3(x)
return x
# 定义num_flat_features函数进行尺度的变换
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
def predict_image():
batch_size = 20
dataset = datasets.ImageFolder(
'data',
transforms.Compose([
transforms.Grayscale(num_output_channels=3),
#transforms.RandomResizedCrop(28),
transforms.Resize((28, 28)),
#transforms.RandomHorizontalFlip(),
transforms.ToTensor()
])
)
train_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True)
images, labels = next(iter(train_loader))
img = torchvision.utils.make_grid(images)
model = LeNet5()
model.load_state_dict(torch.load('model_.pth'))
device = torch.device('cuda')
model.to(device)
correct = 0
total = 0
for data_test in test_loader:
images, labels = data_test
images, labels = Variable(images).cuda(), Variable(labels).cuda()
output_test = model(images)
_, predicted = torch.max(output_test, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
#print(labels," ",predicted)
print("correct1: ", correct)
print("Test acc: {0}".format(correct.item() /
len(dataset)))
def predict_image_singer():
batch_size = 20
dataset = datasets.ImageFolder(
'data',
transforms.Compose([
transforms.Grayscale(num_output_channels=3),
#transforms.RandomResizedCrop(28),
transforms.Resize((28, 28)),
#transforms.RandomHorizontalFlip(),
transforms.ToTensor()
])
)
train_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True)
net = LeNet5()
net.load_state_dict(torch.load('model_.pth'))
device = torch.device('cuda')
net = net.to(device)
#net.eval() #将模型变换为测试模式
correct = 0
total = 0
test_images = []
for data_test in test_loader:
images, labels = data_test
images, labels = Variable(images).cuda(), Variable(labels).cuda()
test_images.append([images,labels])
file = random.choice(test_images)
print(file[0].shape)
print (file[1])
predicted = net(file[0]).argmax(1)
#_, predicted = torch.max(net(file[0]), 1)
print(predicted)
def predict_image_singer_v1(filepath="data/3/3_10.png"):
image = Image.open(filepath)
data_dataset = transforms.Compose([
transforms.Grayscale(num_output_channels=3),
transforms.Resize((28, 28)),
transforms.ToTensor(),
])
my_tensor = data_dataset(image)
#print (my_tensor.shape)
my_tensor = my_tensor.resize_(1,3,28,28)
#my_tensor= my_tensor.cuda()
my_tensor = Variable(my_tensor).cuda()
device = torch.device('cuda')
#print(my_tensor)
net = LeNet5()
net.load_state_dict(torch.load('model_.pth'))
net.eval()
net = net.to(device)
predicted = net(my_tensor).argmax(1)
print("filepath={},result={}".format(filepath,predicted))
predict_image() #acc
predict_image_singer() #genetor predict
predict_image_singer_v1(filepath="data/4/4_12.png") #image predict
思路:
predict_image()计算准确率;
predict_image_singer() 预测结果查看,从迭代器一次预测20个数据;
predict_image_singer_v1()测试单张图片