先简单记录下,后期有空再补充。
1. 定义cnn 模型
import torch.nn as nn
import torch
from torchvision import transforms,models
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv = torch.nn.Sequential(
# 用来实现2d卷积操作,h和w2个维度,当前图片的channel是1,输出是32,卷积核是5
torch.nn.Conv2d(1, 32, kernel_size=5, padding=2),
torch.nn.BatchNorm2d(32),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2)
)
#第一轮卷积之后的大小,输入尺寸是28*28,变为14*14*channel32,输出结果为10维
self.fc = torch.nn.Linear(14 * 14 * 32, 10)
def forward(self, x):
out = self.conv(x)
out = out.view(out.size()[0], -1)
out = self.fc(out)
return out
2.制作自己的数据集
import os
import torch
from torch.utils import data
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset
species = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9
}
class MyDataset(Dataset):
def __init__(self, root, transform = None):
# root :'mnist\\test' or 'mnist\\train'
self.root = root
self.transform = transform
self.data =[]
# 获取子目录 '0','1','2','3',...
sub_root_test = os.listdir(self.root)
for sub_root in sub_root_test:
# 获取子目录下所有图片的名字
sub_image_name_list = os.listdir(os.path.join(self.root,sub_root))
for sub_image_name in sub_image_name_list:
# 获取每张图片的完整路径
image_path = os.path.join(self.root, os.path.join(sub_root, sub_image_name))
# 获取标签,也就是子目录的文件名
label = species[image_path.split('\\')[-2]]
# 做成(图片路径,标签)的元组
sample = (image_path,label)
self.data.append(sample)
def __len__(self):
return len(self.data)
def __getitem__(self, index):
image_path,label = self.data[index]
image_original = Image.open(image_path).convert('RGB')
image_tensor = self.transform(image_original)
return image_tensor,label
class MyDataset_pre(Dataset):
def __init__(self, root, transform = None):
# root :'test_images
self.root = root
self.transform = transform
self.data =[]
image_name_list = os.listdir(self.root)
for image_name in image_name_list:
image_path = os.path.join(self.root, image_name)
self.data.append(image_path)
def __len__(self):
return len(self.data)
def __getitem__(self, index):
image_path = self.data[index]
image_original = Image.open(image_path).convert('RGB')
image_tensor = self.transform(image_original)
return image_tensor
3.模型训练
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from preprocess_dataset import MyDataset
from model import CNN
BATCH_SIZE = 32
EPOCHS = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
trans = transforms.Compose([transforms.ToTensor(),transforms.Grayscale()])
# trans = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
train_dataset = MyDataset('mnist1_new\\train',transform = trans)
test_dataset = MyDataset('mnist1_new\\test',transform = trans)
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = True)
#model = torch.load("model/mnist_model_nn.pkl")
model = CNN()
# model = NeuralNetwork()
# model = VGG16().vgg16_model()
net = model.to(DEVICE)
# loss function
criterion = torch.nn.CrossEntropyLoss()
# 优化器
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
# training
# 将所有的样本遍历完,对模型进行训练后,这一轮称为epoch
for epoch in range(EPOCHS):
model.train()
running_loss = 0.0
for i, data in enumerate(train_loader):
images, labels = data
outputs = net(images)
loss = criterion(outputs, labels)
# running_loss += loss.item()
# 反向传播,完成对参数的优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("epoch is {}, batch is{}/{}, loss is {}".format(epoch + 1, i, len(train_dataset)/BATCH_SIZE, loss.item()))
# eval/test 计算在测试集的精度
loss_test = 0.0
acc = 0.0
accuracy = 0.0
model.eval()
for i, data in enumerate(test_loader):
images , labels = data
outputs = net(images)
loss_test += criterion(outputs, labels)
_, pred = outputs.max(1)
# 判断是否相等计算准确率
accuracy += (pred == labels).sum().item()
accuracy = accuracy / len(test_dataset)
loss_test = loss_test / (len(test_dataset) / BATCH_SIZE)
# 打印精度和损失
print("epoch is {}, accuracy is {}, loss test is {}".format(epoch + 1, accuracy, loss_test.item()))
if accuracy > acc:
acc = accuracy
torch.save(net, "model/mnist_model_nn.pkl")
print('accuancy',acc)
4之前.对图片进行变换,将一串数字按照从左到右切成单个图片。(仅供参考)
import cv2
import numpy as np
import os,torch
from torchvision.transforms import transforms
import numpy
from PIL import Image
def sort_contours(cnts, method='left-to-right'):
# 从左到右排序
reverse = False
i = 0
# handle if sort in reverse
if method == 'right-to-left' or method == 'bottom-to-top':
reverse = True
# handle if sort against y rather than x of the bounding box
if method == 'bottom-to-top' or method == 'top-to-bottom':
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key = lambda b: b[1][i], reverse = reverse))
return (cnts, boundingBoxes)
def cut_image_sign():
'''
中间数字串,切成方块图,白底黑字
'''
root_dir = 'output_me\\'
for im_name in os.listdir(root_dir):
# 149,341,3
image_writer_recongnize = cv2.imread(os.path.join(root_dir,im_name))
h, w = image_writer_recongnize.shape[:2]
SIZE = 138
w_size = 256
image_writer_recongnize = cv2.resize(image_writer_recongnize, (w_size, SIZE))
# cv2.imshow('image_writer_recongnize', image_writer_recongnize)
# cv2.waitKey()
gray_new = cv2.cvtColor(image_writer_recongnize,cv2.COLOR_BGR2GRAY)
# cv2.imshow('gray_new', gray_new)
# cv2.waitKey()
threshold, adaptive_image_1 = cv2.threshold(gray_new, 100, 255, cv2.THRESH_BINARY)
adaptive_image_1 = cv2.dilate(adaptive_image_1,(15,15))
# cv2.imshow('adaptive_image_1', adaptive_image_1)
# cv2.waitKey()
adaptive_image_copy = adaptive_image_1.copy()
cnts_1, h = cv2.findContours(adaptive_image_1, cv2.RETR_CCOMP , cv2.CHAIN_APPROX_SIMPLE)
# contourPic_1 = cv2.drawContours(adaptive_image_copy, cnts_1, -1, (0, 0, 255)
(cnts_new, boundingboxes) = sort_contours(cnts_1)
count1 = 0
for c in cnts_new:
# print(cv2.contourArea(c))
if cv2.contourArea(c) > 300 and cv2.contourArea(c) <4000:
# print(cv2.contourArea(c))
count1 = count1 + 1
x, y, w, h = cv2.boundingRect(c)
image_result = image_writer_recongnize[y:y+h, x:x+w]
image_path = os.path.join('output_me_cut','me_cut{}.png'.format(count1))
cv2.imwrite(image_path, image_result)
def image_address():
'''
图像裁剪为28*28,图像增强,变成黑底白字
'''
root_dir = 'output_me_cut\\'
count3 =0
for im_name in os.listdir(root_dir):
# 149,341,3
count3 = count3 + 1
image_writer_recongnize = Image.open(os.path.join(root_dir,im_name))
image_1 = transforms.Resize((28,28))(image_writer_recongnize)
image2 = np.array(image_1)
image1 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
ret,thresh1 = cv2.threshold(image1,0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
thresh2 = cv2.bitwise_not(thresh1)
thresh3 = cv2.erode(thresh2, (15,15))
cv2.imwrite(os.path.join('output_me_cut_black\\','0_{}.png'.format(count3)),thresh2)
# cv2.imshow('image_writer_recongnize',thresh3)
# cv2.waitKey()
4.预测单张图片
import os
import torch
from PIL import Image
from torch import nn
from torchvision import transforms, models
from torch.utils.data import DataLoader
from preprocess_dataset import MyDataset_pre
from torchvision.transforms import ToPILImage
import cv2
species= ['0','1','2','3','4','5','6','7','8','9']
model = torch.load("model\\mnist_model_nn.pkl", map_location=torch.device("cpu"))
trans = transforms.Compose([transforms.Resize((28,28)),transforms.ToTensor(), transforms.Grayscale()])
#预测的图片最好是按照训练图片的尺寸维度先进行前处理,黑底白字,尺寸为28*28
predict_dataset = MyDataset_pre('output_me_cut_black',transform = trans)
predict_loader = DataLoader(predict_dataset, batch_size=32)
model.eval()
predict =[]
with torch.no_grad():
for i,data in enumerate(predict_loader):
predict =[]
images = data
output = model(images)
_, pred = torch.max(output, 1)
# print(pred)
for i in range(0,len(images)):
class_name = species[int(pred[i].item())]
predict.append(class_name)
s = ''
for i in range(0,len(predict)):
s += predict[i]
print(s)