import requests
import time
import concurrent.futures
import random
def create():
time.sleep(0.1)
r = requests.get('https://www.ymtc.com/cn/info/captchas?t=Lpwd')
with open(f'./dataset_ymtc_login/{str(int(random.random() * 10000000))}_{str(int(time.time()))}.jpg', 'wb+') as f:
f.write(r.content)
f.close()
with concurrent.futures.ThreadPoolExecutor(100) as pool:
for i in range(10000):
pool.submit(create)
pool.shutdown(wait = False)
print('ok')
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import os
from PIL import Image
from tkinter.filedialog import askopenfilename
import glob
captcha_array = list("0123456789abcdefghijklmnopqrstuvwxyz")
def text2vec(x):
zeros = torch.zeros((4, 36), dtype = torch.long)
for i in range(len(x)):
zeros[i, captcha_array.index(x[i])] = 1
vec = zeros
return vec
def vec2text(x):
label = ''
label_tensor = torch.argmax(x, dim=1)
for i in label_tensor:
label += captcha_array[i.data]
return label
def single_vec_2_text(label_tensor):
label = ''
for i in label_tensor:
label += captcha_array[i.data]
return label
def batch_vec_2_text(x):
label_batch = []
label_all = x.argmax(dim=2)
for index_image in range(label_all.size(0)):
label_batch.append(single_vec_2_text(label_all[index_image]))
return label_batch
def compare_list(x, y):
result_list = []
for le in range(len(x)):
if x[le] == y[le]:
result_list.append(True)
return result_list.count(True), len(x), result_list.count(True)/len(x)
transf = transforms.Compose(
[
transforms.Resize((160, 60)),
transforms.Grayscale(),
transforms.ToTensor()
]
)
# 数据类
class MyData(Dataset):
def __init__(self, is_train):
super(MyData, self).__init__()
self.root = './dataset/'
if is_train == True:
self.path = self.root + 'train/'
if is_train == False:
self.path = self.root + 'test/'
def __getitem__(self, item):
imgs = os.listdir(self.path)
label = imgs[item].split('_')[0]
label = text2vec(label).view(1, -1)[0] # 将每个字母类型的label转为tensor,且该tensor为单行,因为一个图片对应一个label,这个转换相当重要
img = Image.open(self.path + imgs[item])
img = transf(img)
return img, label
def __len__(self):
return len(os.listdir(self.path))
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size = (3, 3), stride = (1, 1), padding = 1),
nn.BatchNorm2d(32),
nn.ReLU(inplace = True),
nn.MaxPool2d(2) # out (batch, 32, 80, 30)
)
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2) # out (batch, 64, 40, 15)
)
self.layer3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2) # out (batch, 128, 20, 7)
)
self.layer4 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(2) # out (batch, 256, 10, 3)
)
self.layer5 = nn.Sequential(
nn.Linear(256 * 10 * 3, 2560),
nn.BatchNorm1d(2560),
nn.ReLU(inplace = True),
nn.Dropout(),
nn.Linear(2560, 640),
nn.BatchNorm1d(640),
nn.ReLU(inplace = True),
nn.Dropout(),
nn.Linear(640, 4 * 36) # 不记得这后面了。。。
)
def forward(self, x):
output = self.layer1(x)
output = self.layer2(output)
output = self.layer3(output)
output = self.layer4(output)
output = output.view(output.size(0), -1)
output = self.layer5(output)
return output
if __name__ == '__main__':
old_model = askopenfilename(initialdir='./') # exists
train_data = MyData(is_train = True)
test_data = MyData(is_train = False)
train_set = DataLoader( dataset = train_data, batch_size = 16, shuffle = True)
test_set = DataLoader(dataset = test_data, batch_size = 16, shuffle = True)
# net = MyNet() # new train
current_step = old_model.split('/')[-1].replace('model', '').replace('.pth', '')# exists
net = torch.load(old_model) # exists
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(params = net.parameters(), lr = 0.001)
total_step = int(current_step) # exists
for epoch in range(100):
net.train()
for imgs, labels in iter(train_set):
total_step += 1
optimizer.zero_grad()
outputs = net(imgs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
true_label_train = batch_vec_2_text(labels.view(-1, 4, 36))
yuce_label_train = batch_vec_2_text(outputs.view(-1, 4, 36))
print("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(true_label_train) + str(yuce_label_train) + "\n")
print(compare_list(true_label_train, yuce_label_train))
with open('log.txt', 'a+') as f:
f.write("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(
compare_list(true_label_train, yuce_label_train)) + "\n")
f.close()
if total_step % 100 == 0:
os.remove(glob.glob('./*.pth')[0])
torch.save(net, f'model{total_step}.pth')
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import os
from PIL import Image
from tkinter.filedialog import askopenfilename
import glob
captcha_array = list("0123456789")
def text2vec(x):
zeros = torch.zeros((4, 10), dtype=torch.long)
for i in range(len(x)):
zeros[i, captcha_array.index(x[i])] = 1
vec = zeros
return vec
def vec2text(x):
label = ''
label_tensor = torch.argmax(x, dim=1)
for i in label_tensor:
label += captcha_array[i.data]
return label
def single_vec_2_text(label_tensor):
label = ''
for i in label_tensor:
label += captcha_array[i.data]
return label
def batch_vec_2_text(x):
label_batch = []
label_all = x.argmax(dim=2)
for index_image in range(label_all.size(0)):
label_batch.append(single_vec_2_text(label_all[index_image]))
return label_batch
def compare_list(x, y):
result_list = []
for le in range(len(x)):
if x[le] == y[le]:
result_list.append(True)
return result_list.count(True), len(x), result_list.count(True) / len(x)
transf = transforms.Compose(
[
transforms.Resize((100, 40)),
transforms.Grayscale(),
transforms.ToTensor()
]
)
# 数据类
class MyData(Dataset):
def __init__(self, is_train):
super(MyData, self).__init__()
self.root = r'C://Users/Administrator/Desktop/ymtcyanzhengma/'
if is_train == True:
self.path = self.root + 'train/'
if is_train == False:
self.path = self.root + 'test/'
def __getitem__(self, item):
imgs = os.listdir(self.path)
label = imgs[item].split('_')[0]
label = text2vec(label).view(1, -1)[0] # 将每个字母类型的label转为tensor,且该tensor为单行,因为一个图片对应一个label,这个转换相当重要
img = Image.open(self.path + imgs[item])
img = transf(img)
return img, label
def __len__(self):
return len(os.listdir(self.path))
class ResModule(nn.Module):
def __init__(self, in_channels, out_channels):
super(ResModule, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
if self.in_channels == self.out_channels:
self.sametype = True
else:
self.sametype = False
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=(3, 3), stride=(1, 1),
padding=1),
nn.BatchNorm2d(self.out_channels),
nn.ReLU(inplace=True),
)
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=(3, 3), stride=(1, 1),
padding=1),
nn.BatchNorm2d(self.out_channels),
)
self.relu = nn.ReLU(inplace=True)
if not self.sametype:
self.layer3 = nn.Sequential(
nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=(1, 1),
stride=(1, 1), padding=0),
nn.BatchNorm2d(self.out_channels),
)
def forward(self, x):
output = self.layer1(x)
output = self.layer2(output)
if not self.sametype:
x = self.layer3(x)
output = self.relu(output + x)
return output
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=1), # in (batch, 1, 160, 60)
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.MaxPool2d(2) # out (batch, 16, 80, 30)
)
self.layer2 = nn.Sequential(
ResModule(16, 32), # in (batch, 16, 80, 30)
ResModule(32, 32),
ResModule(32, 64),
ResModule(64, 64),
ResModule(64, 128),
ResModule(128, 128),
ResModule(128, 256),
ResModule(256, 256) # out (batch, 256, 80, 30)
)
self.layer3 = nn.MaxPool2d(2) # in (batch, 256, 80, 30) # out (batch, 256, 40, 15)
self.layer4 = nn.Sequential(
nn.Linear(256 * 25 * 10, 2048),
nn.BatchNorm1d(2048),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(2048, 512),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(512, 4 * 10)
)
def forward(self, x):
output = self.layer1(x)
output = self.layer2(output)
output = self.layer3(output)
output = output.view(output.size(0), -1)
output = self.layer4(output)
return output
if __name__ == '__main__':
# old_model = askopenfilename(initialdir='./') # exists
train_data = MyData(is_train=True)
test_data = MyData(is_train=False)
train_set = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
test_set = DataLoader(dataset=test_data, batch_size=64, shuffle=True)
net = MyNet() # new train
# current_step = old_model.split('/')[-1].replace('model', '').replace('.pth', '') # exists
# net = torch.load(old_model) # exists
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
# total_step = int(current_step) # exists
total_step = 0
for epoch in range(100):
net.train()
for imgs, labels in iter(train_set):
total_step += 1
optimizer.zero_grad()
outputs = net(imgs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
true_label_train = batch_vec_2_text(labels.view(-1, 4, 10))
yuce_label_train = batch_vec_2_text(outputs.view(-1, 4, 10))
print("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(true_label_train) + str(
yuce_label_train) + "\n")
print(compare_list(true_label_train, yuce_label_train))
with open('log.txt', 'a+') as f:
f.write("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(
compare_list(true_label_train, yuce_label_train)) + "\n")
f.close()
s = None
if total_step % 100 == 0:
try:
s = glob.glob('./*.pth')[0]
except:
pass
if s:
os.remove(glob.glob('./*.pth')[0])
torch.save(net, f'model{total_step}.pth')