毕业论文相关代码如下:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
#读取文件路径,看名称
import os
for dirname, _, filenames in os.walk(’/kaggle/input’):
i=1
print(len(filenames))
for filename in filenames:
print(os.path.join(dirname, filename))
i = i + 1
if i == 10:
break
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import cv2
import json
%matplotlib inline
看Kaggle提供的GPU型号
!nvidia-smi
#根据之前查到的文件路径选择一张图片进行查看
DIR =’/kaggle/input/words-mnist/dataset/v011_words_small/9273.png’
img = cv2.imread(DIR)
plt.imshow(img)
#看图片像素大小
print(img.shape[0],img.shape[1])
看图片对应的标签。{图片名:label}是字典形式,
with open(’/kaggle/input/words-mnist/v011_labels_small.json’, ‘r’)as f:
lable = json.load(f)
print(lable['8649.jpeg'])
print(lable['9273.png'])
lables = [{x:y} for x,y in lable.items()]
#对所有标签包含的所有字符进行去重,目标字符100个,加上背景是101个。
class_num = len(set(j for i in lable.values() for j in i))
class_num
char_set = set(j for i in lable.values() for j in i)
alphabet = []#用来解码的字符串
char_dict={}#用来编码(文本变数字)的字典
char_decode = {}
char_decode[’ '] = 0
for i,j in enumerate(char_set):
alphabet.append(j)
char_dict[j] = i + 1
字典形式,图片名:lable,目标字符一共有100个,加上背景就是101个类别
开始写模型
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data import sampler
import torch.utils.data as data
import torchvision.transforms as transforms
import json
import torch.optim as optim
#双向LSTM的模型
class BidirectionalLSTM(nn.Module):
def __init__(self, nIn, nHidden, nOut):
super(BidirectionalLSTM, self).__init__()
self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
self.embedding = nn.Linear(nHidden * 2, nOut) # *2因为使用双向LSTM,两个方向隐层单元拼在一起
def forward(self, input):
recurrent, _ = self.rnn(input)
T, b, h = recurrent.size()
t_rec = recurrent.view(T * b, h)
output = self.embedding(t_rec) # [T * b, nOut]
output = output.view(T, b, -1)
return output
#CRNN模型
class CRNN(nn.Module):
def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
super(CRNN, self).__init__()
ks = [3, 3, 3, 3, 3, 3, 2] # 卷积层卷积尺寸3表示3x3,2表示2x2
ps = [1, 1, 1, 1, 1, 1, 0] # padding大小
ss = [1, 1, 1, 1, 1, 1, 1] # stride大小
nm = [64, 128, 256, 256, 512, 512, 512] # 卷积核个数
cnn = nn.Sequential()
def convRelu(i, batchNormalization=False):
nIn = nc if i == 0 else nm[i - 1] # 确定输入channel维度
nOut = nm[i] # 确定输出channel维度
cnn.add_module('conv{0}'.format(i),
nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) # 添加卷积层
# BN层
if batchNormalization:
cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
# Relu激活层
if leakyRelu:
cnn.add_module('relu{0}'.format(i),
nn.LeakyReLU(0.2, inplace=True))
else:
cnn.add_module('relu{0}'.format(i), nn.ReLU(True))
convRelu(0)
cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))
convRelu(1)
cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))
convRelu(2, True)
convRelu(3)
cnn.add_module('pooling{0}'.format(2),
nn.MaxPool2d((2, 2), (2, 1), (0, 1)))
convRelu(4, True)
convRelu(5)
cnn.add_module('pooling{0}'.format(3),
nn.MaxPool2d((2, 2), (2, 1), (0, 1)))
convRelu(6, True)
self.cnn = cnn
self.rnn = nn.Sequential(
BidirectionalLSTM(512, nh, nh),
BidirectionalLSTM(nh, nh, nclass))
#神经网络前馈的模型
def forward(self, input):
# conv features
conv = self.cnn(input)
b, c, h, w = conv.size()
conv = conv.squeeze(2)
conv = conv.permute(2, 0, 1)
# rnn features
output = self.rnn(conv)
return output
#初始化模型权重
def weights_init(m):
classname = m.class.name
if classname.find(‘Conv’) != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find(‘BatchNorm’) != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
#初始化CRNN
def get_crnn():
model = CRNN(32, 1, 101, 256,False)
model.apply(weights_init)
return model
#改造数据集,在这里将10000张图片和标签分为了训练集和验证集
class GetData(Dataset):
def init(self, is_train=True):
self.is_train = is_train
self.inp_h = 32#设置图片的像素
self.inp_w = 160
self.mean = np.array(0.588, dtype=np.float32)#标准化
self.std = np.array(0.193, dtype=np.float32) #
self.data = []
with open(’…//input/words-mnist/v011_labels_small.json’, ‘r’) as f:
self.labels = json.load(f)
self.data = [{x:y} for x,y in self.labels.items()]
self.data = self.data[:7000]
print(“load {} images!”.format(self.len()))
def __len__(self):
# 实现模板方法
return len(self.data)
def __getitem__(self,idx):
img_name = list(self.data[idx].keys())[0]
img = cv2.imread(os.path.join('/kaggle/input/words-mnist/dataset/v011_words_small/',img_name))
print(img.shape[0],img.shape[1])
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_h, img_w = img.shape
img = cv2.resize(img, (0,0), fx=self.inp_w / img_w, fy=self.inp_h / img_h, interpolation=cv2.INTER_CUBIC)
img = np.reshape(img, (self.inp_h, self.inp_w, 1))
img = img.astype(np.float32)
img = (img/255. - self.mean) / self.std
img = img.transpose([2, 0, 1])
return img, self.data[idx].values()
class GetVal(Dataset):
def init(self, is_train=True):
self.is_train = is_train
self.inp_h = 32
self.inp_w = 160
self.mean = np.array(0.588, dtype=np.float32)
self.std = np.array(0.193, dtype=np.float32)
self.data = []
with open(’…//input/words-mnist/v011_labels_small.json’, ‘r’) as f:
self.labels = json.load(f)
self.data = [{x:y} for x,y in self.labels.items()]
self.data = self.data[7000:]
print(“load {} images!”.format(self.len()))
def __len__(self):
# 实现模板方法
return len(self.data)
def __getitem__(self,idx):
img_name = list(self.data[idx].keys())[0]
img = cv2.imread(os.path.join('/kaggle/input/words-mnist/dataset/v011_words_small/',img_name))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_h, img_w = img.shape
img = cv2.resize(img, (0,0), fx=self.inp_w / img_w, fy=self.inp_h / img_h, interpolation=cv2.INTER_CUBIC)
img = np.reshape(img, (self.inp_h, self.inp_w, 1))
img = img.astype(np.float32)
img = (img/255. - self.mean) / self.std
img = img.transpose([2, 0, 1])
return img, self.data[idx].values()
#数据编码
def encode(char_dict,text):#把标签编码为数字
length = []
result = []
for item in text:
length.append(len(item))
for char in item:
index = char_dict[char]
result.append(index)
text = result
return (torch.LongTensor(text), torch.IntTensor(length))
def decode(t, length,alphabet, raw=False):#解码,数字转换为字符串
if length.numel() == 1:
length = length[0]
assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length)
char_list = []
for i in range(length):
if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
char_list.append(alphabet[t[i] - 1])
return ''.join(char_list)
else:
# batch mode
assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum())
texts = []
index = 0
for i in range(length.numel()):
l = length[i]
texts.append(
decode(
t[index:index + l], torch.IntTensor([l]),alphabet, raw=raw))
index += l
return texts
def get_optimizer(model):#优化器,优化我的损失函数
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
lr=0.0001)
return optimizer
def get_batch_label(d, i):#批量编码
label = []
for idx in i:
label.append(list(d.data[idx].values())[0])
return label
train_dataset = GetData()
val_dataset = GetVal()
#调用pytorch的函数来加载数据集
train_loader = DataLoader(
dataset=train_dataset,
batch_size=64,
shuffle=True
)
val_loader = DataLoader(
dataset=val_dataset,
batch_size=64,
shuffle=True
)
model = get_crnn()
if torch.cuda.is_available():
device = torch.device(“cuda:0”)
print(‘gpu’)
else:
device = torch.device(“cpu:0”)
print(‘cpu’)
model.to(device)
optimizer = get_optimizer(model)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer, [50,60,70],
0.1
)
criterion = torch.nn.CTCLoss()
random.seed(123)
np.random.seed(123)
torch.manual_seed(123)
torch.cuda.manual_seed(123)
torch.cuda.manual_seed_all(123)
os.mkdir(‘crnn_modudle’)
best_acc = 0.0
for epoch in range(100):
model.train()
for i,(inp, idx) in enumerate(train_loader):
inp = inp.to(device)
# print(idx)
train_lable = get_batch_label(train_dataset, idx)
batch_size = inp.size(0)
text, length = encode(alphabet,train_lable)
preds = model(inp).cpu()
preds_size = torch.IntTensor([preds.size(0)] * batch_size)
loss = criterion(preds, text, preds_size, length)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i == 100 and epoch % 5 == 0:
print("epoch:{} step:{} loss:{} lr:{}".format(epoch,i,loss.item(),lr_scheduler.get_last_lr()))
lr_scheduler.step()
if epoch % 10 == 0:
model.eval()
n_correct = 0
test_num = len(val_loader) * 16
with torch.no_grad():
for i, (inp, idx) in enumerate(val_loader):
# 计算前馈,计算loss
inp = inp.to(device)
val_labels = get_batch_label(val_dataset, idx)
batch_size = inp.size(0)
text, length = encode(alphabet,val_labels)
# print('val_labels', val_labels)
preds = model(inp).cpu()
# print("网络输出的preds的shape:",preds.cpu().detach().shape)
preds_size = torch.IntTensor([preds.size(0)] * batch_size)
loss = criterion(preds, text, preds_size, length)
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
# print("网络输出的preds的shape:",preds.cpu().detach().shape)
sim_preds = decode(preds.data, preds_size.data, alphabet,raw=False)
# print('sim_preds', sim_preds)
for pred, target in zip(sim_preds, val_labels):
if pred == target:
n_correct += 1
raw_preds = decode(preds.data, preds_size.data, alphabet, raw=True)[:5]
for raw_pred, pred, gt in zip(raw_preds, sim_preds, val_labels):
print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))
now_acc = n_correct * 1.0 / test_num
print("best_acc:{} correct:{} total:{} ".format(now_acc,n_correct,test_num))
if now_acc >= best_acc:
torch.save(
{"state_dict": model.state_dict(),
"epoch": epoch + 1,
"best_acc": best_acc,
}, os.path.join('crnn_modudle', "checkpoint_{}_acc_{:.4f}.pth".format(epoch, now_acc)))
best_acc = now_acc
print("save_model!")