yanzhengm

import requests
import time
import concurrent.futures
import random

def create():

    time.sleep(0.1)
    r = requests.get('https://www.ymtc.com/cn/info/captchas?t=Lpwd')
    with open(f'./dataset_ymtc_login/{str(int(random.random() * 10000000))}_{str(int(time.time()))}.jpg', 'wb+') as f:
        f.write(r.content)
        f.close()

with concurrent.futures.ThreadPoolExecutor(100) as pool:
    for i in range(10000):
        pool.submit(create)
    pool.shutdown(wait = False)
print('ok')
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import os
from PIL import Image
from tkinter.filedialog import askopenfilename
import glob

captcha_array = list("0123456789abcdefghijklmnopqrstuvwxyz")

def text2vec(x):
    zeros = torch.zeros((4, 36), dtype = torch.long)
    for i in range(len(x)):
        zeros[i, captcha_array.index(x[i])] = 1
    vec = zeros
    return vec

def vec2text(x):
    label = ''
    label_tensor = torch.argmax(x, dim=1)
    for i in label_tensor:
        label += captcha_array[i.data]
    return label

def single_vec_2_text(label_tensor):
    label = ''
    for i in label_tensor:
        label += captcha_array[i.data]
    return label

def batch_vec_2_text(x):
    label_batch = []
    label_all = x.argmax(dim=2)
    for index_image in range(label_all.size(0)):
        label_batch.append(single_vec_2_text(label_all[index_image]))
    return label_batch

def compare_list(x, y):
    result_list = []
    for le in range(len(x)):
        if x[le] == y[le]:
            result_list.append(True)
    return result_list.count(True), len(x), result_list.count(True)/len(x)


transf = transforms.Compose(
    [
        transforms.Resize((160, 60)),
        transforms.Grayscale(),
        transforms.ToTensor()
    ]
)

# 数据类
class MyData(Dataset):
    def __init__(self, is_train):
        super(MyData, self).__init__()
        self.root = './dataset/'
        if is_train == True:
            self.path = self.root + 'train/'
        if is_train == False:
            self.path = self.root + 'test/'

    def __getitem__(self, item):
        imgs = os.listdir(self.path)
        label = imgs[item].split('_')[0]
        label = text2vec(label).view(1, -1)[0] # 将每个字母类型的label转为tensor,且该tensor为单行,因为一个图片对应一个label,这个转换相当重要
        img = Image.open(self.path + imgs[item])
        img = transf(img)
        return img, label

    def __len__(self):
        return len(os.listdir(self.path))

class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size = (3, 3), stride = (1, 1), padding = 1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2)             # out (batch, 32, 80, 30)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)  # out (batch, 64, 40, 15)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)  # out (batch, 128, 20, 7)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)  # out (batch, 256, 10, 3)
        )

        self.layer5 = nn.Sequential(
            nn.Linear(256 * 10 * 3, 2560),
            nn.BatchNorm1d(2560),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(2560, 640),
            nn.BatchNorm1d(640),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(640, 4 * 36) # 不记得这后面了。。。
        )

    def forward(self, x):
        output = self.layer1(x)
        output = self.layer2(output)
        output = self.layer3(output)
        output = self.layer4(output)
        output = output.view(output.size(0), -1)
        output = self.layer5(output)
        return output

if __name__ == '__main__':

    old_model = askopenfilename(initialdir='./')  # exists

    train_data = MyData(is_train = True)
    test_data = MyData(is_train = False)
    train_set = DataLoader( dataset = train_data, batch_size = 16, shuffle = True)
    test_set = DataLoader(dataset = test_data, batch_size = 16, shuffle = True)

    # net = MyNet() # new train

    current_step = old_model.split('/')[-1].replace('model', '').replace('.pth', '')# exists
    net = torch.load(old_model) # exists

    criterion = nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.Adam(params = net.parameters(), lr = 0.001)

    total_step = int(current_step) # exists
    for epoch in range(100):
        net.train()
        for imgs, labels in iter(train_set):
            total_step += 1
            optimizer.zero_grad()
            outputs = net(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            true_label_train = batch_vec_2_text(labels.view(-1, 4, 36))
            yuce_label_train = batch_vec_2_text(outputs.view(-1, 4, 36))
            print("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(true_label_train) + str(yuce_label_train) + "\n")
            print(compare_list(true_label_train, yuce_label_train))
            with open('log.txt', 'a+') as f:
                f.write("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(
                    compare_list(true_label_train, yuce_label_train)) + "\n")
                f.close()
            if total_step % 100 == 0:
                os.remove(glob.glob('./*.pth')[0])
                torch.save(net, f'model{total_step}.pth')
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import os
from PIL import Image
from tkinter.filedialog import askopenfilename
import glob

captcha_array = list("0123456789")


def text2vec(x):
    zeros = torch.zeros((4, 10), dtype=torch.long)
    for i in range(len(x)):
        zeros[i, captcha_array.index(x[i])] = 1
    vec = zeros
    return vec


def vec2text(x):
    label = ''
    label_tensor = torch.argmax(x, dim=1)
    for i in label_tensor:
        label += captcha_array[i.data]
    return label


def single_vec_2_text(label_tensor):
    label = ''
    for i in label_tensor:
        label += captcha_array[i.data]
    return label


def batch_vec_2_text(x):
    label_batch = []
    label_all = x.argmax(dim=2)
    for index_image in range(label_all.size(0)):
        label_batch.append(single_vec_2_text(label_all[index_image]))
    return label_batch


def compare_list(x, y):
    result_list = []
    for le in range(len(x)):
        if x[le] == y[le]:
            result_list.append(True)
    return result_list.count(True), len(x), result_list.count(True) / len(x)


transf = transforms.Compose(
    [
        transforms.Resize((100, 40)),
        transforms.Grayscale(),
        transforms.ToTensor()
    ]
)


# 数据类
class MyData(Dataset):
    def __init__(self, is_train):
        super(MyData, self).__init__()
        self.root = r'C://Users/Administrator/Desktop/ymtcyanzhengma/'
        if is_train == True:
            self.path = self.root + 'train/'
        if is_train == False:
            self.path = self.root + 'test/'

    def __getitem__(self, item):
        imgs = os.listdir(self.path)
        label = imgs[item].split('_')[0]
        label = text2vec(label).view(1, -1)[0]  # 将每个字母类型的label转为tensor,且该tensor为单行,因为一个图片对应一个label,这个转换相当重要
        img = Image.open(self.path + imgs[item])
        img = transf(img)
        return img, label

    def __len__(self):
        return len(os.listdir(self.path))


class ResModule(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResModule, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels

        if self.in_channels == self.out_channels:
            self.sametype = True
        else:
            self.sametype = False

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=(3, 3), stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(self.out_channels),
            nn.ReLU(inplace=True),
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=self.out_channels, out_channels=self.out_channels, kernel_size=(3, 3), stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(self.out_channels),
        )

        self.relu = nn.ReLU(inplace=True)

        if not self.sametype:
            self.layer3 = nn.Sequential(
                nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=(1, 1),
                          stride=(1, 1), padding=0),
                nn.BatchNorm2d(self.out_channels),
            )

    def forward(self, x):
        output = self.layer1(x)
        output = self.layer2(output)
        if not self.sametype:
            x = self.layer3(x)
        output = self.relu(output + x)
        return output


class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=1),  # in (batch, 1, 160, 60)
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)  # out (batch, 16, 80, 30)
        )

        self.layer2 = nn.Sequential(
            ResModule(16, 32),  # in (batch, 16, 80, 30)
            ResModule(32, 32),
            ResModule(32, 64),
            ResModule(64, 64),
            ResModule(64, 128),
            ResModule(128, 128),
            ResModule(128, 256),
            ResModule(256, 256)  # out (batch, 256, 80, 30)
        )

        self.layer3 = nn.MaxPool2d(2)  # in (batch, 256, 80, 30) # out (batch, 256, 40, 15)

        self.layer4 = nn.Sequential(
            nn.Linear(256 * 25 * 10, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(2048, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 4 * 10)
        )

    def forward(self, x):
        output = self.layer1(x)
        output = self.layer2(output)
        output = self.layer3(output)
        output = output.view(output.size(0), -1)
        output = self.layer4(output)
        return output


if __name__ == '__main__':
    # old_model = askopenfilename(initialdir='./')  # exists

    train_data = MyData(is_train=True)
    test_data = MyData(is_train=False)
    train_set = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
    test_set = DataLoader(dataset=test_data, batch_size=64, shuffle=True)

    net = MyNet() # new train

    # current_step = old_model.split('/')[-1].replace('model', '').replace('.pth', '')  # exists
    # net = torch.load(old_model)  # exists

    criterion = nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)

    # total_step = int(current_step)  # exists
    total_step = 0
    for epoch in range(100):
        net.train()
        for imgs, labels in iter(train_set):
            total_step += 1
            optimizer.zero_grad()
            outputs = net(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            true_label_train = batch_vec_2_text(labels.view(-1, 4, 10))
            yuce_label_train = batch_vec_2_text(outputs.view(-1, 4, 10))
            print("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(true_label_train) + str(
                yuce_label_train) + "\n")
            print(compare_list(true_label_train, yuce_label_train))
            with open('log.txt', 'a+') as f:
                f.write("训练{}次,loss:{}".format(total_step * 1, loss.item()) + str(
                    compare_list(true_label_train, yuce_label_train)) + "\n")
                f.close()
                s = None
            if total_step % 100 == 0:
                try:
                    s = glob.glob('./*.pth')[0]
                except:
                    pass
                if s:
                    os.remove(glob.glob('./*.pth')[0])
                torch.save(net, f'model{total_step}.pth')

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值