使用的数据集:MNIST
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torch.utils.data import random_split
from torch.utils.data import TensorDataset
import torchvision.transforms as transforms
# 定义CNN模型,如果看不懂,需要先学习CNN过程
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16,32,5,1,2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.out = nn.Linear(32 * 7 * 7,10)
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(-1,32 * 7 * 7)
output = self.out(x)
return output
# 批处理大小
batch_size = 64
# 把数据转成tensor,并遵从正态分布
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# 训练集
train_dataset = MNIST(root=r'C:/Users/Administrator/Desktop/data/',
train=True,
download=True,
transform=transform)
train_loader = DataLoader(train_dataset,
shuffle=True,
batch_size=batch_size)
# 测试集
test_dataset = MNIST(root=r'C:/Users/Administrator/Desktop/data/',
train=False,
download=True,
transform=transform)
test_loader = DataLoader(test_dataset,
shuffle=True,
batch_size=batch_size)
# 实例化模型
model = Net()
print(model)
# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()
# 使用带有动量的随机梯度下降
optimizer = torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.5)
# 用于存储损失
loss_list = []
for epoch in range(10):
for batch,(X,y) in enumerate(train_loader):
# 正向传播
y_pred = model(X)
# 计算损失
loss = criterion(y_pred,y)
# 梯度归零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
# 每300次看下损失
if batch % 300 == 0:
loss_list.append(loss.data.item())
print("loss------------",loss.data.item())
# 显示损失下降的图像
plt.plot(np.linspace(0,1000,len(loss_list)),loss_list)
plt.show()
# 检验测试集的正确率
rets = []
total = 0
correct = 0
# 不需要计算梯度
with torch.no_grad():
for data in test_loader:
X, y = data
y_pred = model(X)
# 返回值有两个,第一个是最大的值,第二个是最大值的索引
_,predicted = torch.max(y_pred.data,dim=1)
total += y.size(0)
correct += (predicted == y).sum().item()
print('accuracy on test set: %.2f %% ' % (100.0*(correct/total)))
显示前八十个预测的结果
plt.figure(figsize=(20,20))
for i in range(80):
plt.subplot(8,10,i+1)
plt.xticks()
plt.yticks()
plt.grid(False)
plt.imshow(test_dataset.data[i],cmap=plt.cm.binary_r)
X = torch.FloatTensor(np.array(test_dataset.data[i]))
X = X.view(1,1,28,28)
y_pred = model(X)
_,predicted = torch.max(y_pred.data,dim=1)
if test_dataset.targets[i].data.item() == predicted.data.item():
plt.title(test_dataset.targets[i].data.item())
else:
plt.title(predicted.data.item(),color='red')
plt.show()
错误的会显示红色,这里说明前80个全部预测正确了,毕竟是99.10%的正确率
然后简单测试下泛化能力:
我自己写了0123456789看它能不能识别
这里我截取的图的大小是28*28的
以后有空会做对图片自动切割划分
这里只是做了对图片灰度处理和二值化
import cv2
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(20,2))
for i in range(10):
plt.subplot(1,10,i+1)
plt.xticks(())
plt.yticks(())
plt.grid(False)
img = cv2.imread("c:/Users/Administrator/Desktop/image/"+str(i)+".png")
plt.imshow(img)
plt.figure(figsize=(20,2))
for i in range(10):
plt.subplot(1,10,i+1)
plt.xticks(())
plt.yticks(())
plt.grid(False)
img = cv2.imread("c:/Users/Administrator/Desktop/image/"+str(i)+".png")
# 转成灰度图片
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 二值化
ret,img = cv2.threshold(img, 155, 255, cv2.THRESH_BINARY_INV)
# 进行预测
X = torch.FloatTensor(np.array(img))
X = X.view(1,1,28,28)
y_pred = model(X)
_,predicted = torch.max(y_pred.data,dim=1)
if i == predicted.data.item():
plt.title(predicted.data.item(),c='c')
else:
plt.title(predicted.data.item(),c='r')
plt.imshow(img)
plt.show()
效果还行,0和9认错了