在自定义宝可梦数据集上使用resnet18网络

目录结构

 

train

validation

test


 

一般来说现在都是用Adam这个优化器,这个优化器算是性能最好的
SGD不能优化的,Adam都能很好的优化
如果没有特别的要求一般来说都是用Adam这个优化器


 

train.py

import torch
from torch import optim, nn
import visdom
import torchvision
from torch.utils.data import DataLoader

from pokemon import Pokemon
from resnet import ResNet18


batchsz = 32
lr = 1e-3
epochs = 10

device = torch.device('cuda')
torch.manual_seed(1234)


train_db = Pokemon('dataset/pokemon',224,mode='train')
val_db = Pokemon('dataset/pokemon',224,mode='val')
test_db = Pokemon('dataset/pokemon',224,mode='test')

train_loader = DataLoader(train_db, batch_size=batchsz, shuffle=True, num_workers=4)
val_loader = DataLoader(val_db, batch_size=batchsz, shuffle=True, num_workers=2)
test_loader = DataLoader(test_db, batch_size=batchsz, shuffle=True, num_workers=2)


viz = visdom.Visdom()


def evalute(model, loader):
	correct = 0
	total = len(loader.dataset)

	for x,y in loader:
		x,y = x.to(device), y.to(device)
		with torch.no_grad():
			logits = model(x)
			pred = logits.argmax(dim=1)
		correct += torch.eq(pred, y).sum().float().item()

	return correct/total



def main():
	model = ResNet18(5).to(device)
	optimizer = optim.Adam(model.parameters())  # 
	criteon = nn.CrossEntropyLoss()  #接受的是logits

	best_acc, best_epoch = 0, 0
	

	best_acc, best_epoch = 0,0
	global_step=0
	viz.line([0],[-1], win='loss', opts=dict(title='loss'))
	viz.line([0],[-1], win='val_acc', opts=dict(title='val_acc'))

	for epoch in range(epochs):
		for step, (x,y) in enumerate(train_loader):

			#x: [b,3,224,224]  y:[b]
			x,y = x.to(device), y.to(device)

			logits = model(x)
			loss = criteon(logits,y)

			optimizer.zero_grad()
			loss.backward()
			optimizer.step()

			viz.line([loss.item()],[global_step], win='loss', update='append')
			global_step += 1

		if epoch % 1 ==0:   #2个epoch做一个validation
			val_acc = evalute(model, val_loader)
			if val_acc > best_acc:
				best_epoch = epoch
				best_acc = val_acc

				torch.save(model.state_dict(), 'best.mdl')
				
				viz.line([val_acc],[global_step], win='val_acc', update='append')


	print('best acc:', best_acc, 'best epoch:',best_epoch)

	model.load_state_dict(torch.load('best.mdl'))
	print('loaded from ckpt!')

	test_acc = evalute(model, test_loader)
	print('test acc:', test_acc)




if __name__=='__main__':
	main()

pokemon.py

import torch
import os,glob
import random,csv
import visdom
import time
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
 
 
class Pokemon(Dataset):
    def __init__(self,root, resize, mode):
        super(Pokemon,self).__init__()
 
        self.root = root
        self.resize = resize
        #----------------编号-----------------------------------
        self.name2label = {}  #给不同类别的图片编号,比如妙蛙种子是0
        for name in sorted(os.listdir(os.path.join(root))):
            if not os.path.isdir(os.path.join(root,name)):
                continue
            self.name2label[name] = len(self.name2label.keys())
 
        # print(self.name2label)
        #-----------------------------------------------------
 
        #------------------------裁剪-----------------------
        #image, label
        self.images, self.labels = self.load_csv('images.csv')
 
        if mode=='train':   #选60%的数据用于train
            self.images = self.images[:int(0.6*len(self.images))]
            self.labels = self.labels[:int(0.6*len(self.labels))]
        elif mode == 'val':  #20%的数据用于validationn
            self.images = self.images[int(0.6*len(self.images)):int(0.8*len(self.images))]
            self.labels = self.labels[int(0.6*len(self.labels)):int(0.8*len(self.labels))]
        else:   #20%的数据用于test
            self.images = self.images[int(0.8*len(self.images)):]
            self.labels = self.labels[int(0.8*len(self.labels)):]
        #-----------------------------------------------------------
 
 
    def load_csv(self,filename):
        #print(self.root)
        if not os.path.exists(os.path.join(self.root,filename)):
            images = []
            for name in self.name2label.keys():
                #'pokemon\\mewtwo\\00001.png'
                images += glob.glob(os.path.join(self.root, name,'*.png'))
                images += glob.glob(os.path.join(self.root,name,'*.jpg'))
                images += glob.glob(os.path.join(self.root, name, '*.jpeg'))
            #1167,'dataset/pokemon\\bulbasaur\\00000000.png'
            print(len(images),images)
 
            #打乱一下
            random.shuffle(images)
 
            with open(os.path.join(self.root,filename),mode='w',newline='') as f:
                writer = csv.writer(f)
                for img in images:
                    name = img.split(os.sep)[-2]
                    label = self.name2label[name]
                    #'dataset/pokemon\\bulbasaur\\00000000.png',0
                    writer.writerow([img,label])
                print('write into csv file:',filename)
 
        #read from csv file
        images, labels = [],[]
        with open(os.path.join(self.root, filename)) as f:
            reader = csv.reader(f)
            for row in reader:
                #'dataset/pokemon\\bulbasaur\\00000000.png',0
                img,label = row
                label = int(label)
 
                images.append(img)
                labels.append(label)
 
            assert len(images) == len(labels)
 
            return images, labels
 
    def __len__(self):
        return len(self.images)
 
 
    def denormalize(self, x_hat):
        mean = [0.485,0.456,0.406]
        std = [0.229,0.224,0.225]
 
        #normalize的流程: x_hat = (x-mean)/std
        #我们要denormalize: x_hat*std + mean
        #x: [c,h,w]
        #mean: [3] => [3,1,1]
        mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
        std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
        x = x_hat * std + mean
 
        return x
 
    def __getitem__(self, idx):
        #idx的范围 [0~len(images)]
        #img:'dataset/pokemon\\bulbasaur\\00000000.png'
        #label : 0/1/2/3/4
        img,label = self.images[idx],self.labels[idx]
 
        tf = transforms.Compose([
            lambda x:Image.open(x).convert('RGB'),   #string path => image data
            transforms.Resize((int(self.resize*1.25), int(self.resize*1.25))), 
            transforms.RandomRotation(15),
            transforms.CenterCrop(self.resize),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485,0.456,0.406],
                                 std=[0.229,0.224,0.225])
        ])
 
        img = tf(img)
        label = torch.tensor(label)
        return img,label
 
 
 
 
def main():
    viz = visdom.Visdom()
    db = Pokemon('dataset/pokemon',224,'train')
 
    x,y = next(iter(db))
    print('sample:', x.shape, y.shape, y)
 
    viz.image(db.denormalize(x), win='sample_x', opts=dict(title='sample_x'))
 
 
 
if __name__ == '__main__':
    main()

resnet.py

import  torch
from    torch import  nn
from    torch.nn import functional as F



class ResBlk(nn.Module):
    """
    resnet block
    """

    def __init__(self, ch_in, ch_out, stride=1):
        """
        :param ch_in:
        :param ch_out:
        """
        super(ResBlk, self).__init__()

        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)

        self.extra = nn.Sequential()
        if ch_out != ch_in:
            # [b, ch_in, h, w] => [b, ch_out, h, w]
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
                nn.BatchNorm2d(ch_out)
            )


    def forward(self, x):
        """
        :param x: [b, ch, h, w]
        :return:
        """
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # short cut.
        # extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
        # element-wise add:
        out = self.extra(x) + out
        out = F.relu(out)

        return out




class ResNet18(nn.Module):

    def __init__(self, num_class):
        super(ResNet18, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=3, padding=0),
            nn.BatchNorm2d(16)
        )
        # followed 4 blocks
        # [b, 16, h, w] => [b, 32, h ,w]
        self.blk1 = ResBlk(16, 32, stride=3)
        # [b, 32, h, w] => [b, 64, h, w]
        self.blk2 = ResBlk(32, 64, stride=3)
        # # [b, 64, h, w] => [b, 128, h, w]
        self.blk3 = ResBlk(64, 128, stride=2)
        # # [b, 128, h, w] => [b, 256, h, w]
        self.blk4 = ResBlk(128, 256, stride=2)

        # [b, 256, 7, 7]
        self.outlayer = nn.Linear(256*3*3, num_class)

    def forward(self, x):
        """
        :param x:
        :return:
        """
        x = F.relu(self.conv1(x))

        # [b, 64, h, w] => [b, 1024, h, w]
        x = self.blk1(x)
        x = self.blk2(x)
        x = self.blk3(x)
        x = self.blk4(x)

        # print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.outlayer(x)


        return x



def main():
    blk = ResBlk(64, 128)
    tmp = torch.randn(2, 64, 224, 224)
    out = blk(tmp)
    print('block:', out.shape)


    model = ResNet18(5)
    tmp = torch.randn(2, 3, 224, 224)
    out = model(tmp)
    print('resnet:', out.shape)

    p = sum(map(lambda p:p.numel(), model.parameters()))  #打印参数量
    print('parameters size:', p)


if __name__ == '__main__':
    main()

 

运行train.py

 

训练完成之后

 

 

  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
好的,以下是使用MindSpore框架在FER2013数据集上进行图像识别的步骤: 1. 下载FER2013数据集:可以到Kaggle上下载fer2013.csv文件,并将其放在指定的目录下。 2. 数据处理:使用pandas库读取fer2013.csv文件,然后将数据集分成训练集和测试集。可以使用sklearn库的train_test_split函数来分割数据集。 3. 数据增强:为了提高模型的泛化能力,我们可以使用数据增强技术。MindSpore框架提供了丰富的数据增强操作,如RandomCrop、RandomHorizontalFlip等。 4. 搭建ResNet18模型:使用MindSpore框架搭建ResNet18模型,可以使用MindSpore提供的Model类来实现。 5. 模型训练:使用MindSpore框架进行模型训练,可以使用MindSpore提供的Model.train函数来实现。 6. 模型测试:使用测试集对模型进行测试,可以使用MindSpore提供的Model.eval函数来实现。 7. 模型保存:使用MindSpore框架保存训练好的模型,可以使用MindSpore提供的Model.save_checkpoint函数来实现。 下面是一个使用MindSpore框架在FER2013数据集上进行图像识别的示例代码: ```python import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.vision.c_transforms as CV import mindspore.nn as nn import mindspore.ops.operations as P from mindspore import context, Tensor from mindspore.train.serialization import load_checkpoint, save_checkpoint # 1. 下载FER2013数据集 # 2. 数据处理 data = pd.read_csv('fer2013.csv') pixels = data['pixels'].tolist() faces = [] for pixel_sequence in pixels: face = [int(pixel) for pixel in pixel_sequence.split(' ')] face = np.asarray(face).reshape(48, 48) faces.append(face.astype(np.uint8)) X = np.asarray(faces) y = pd.get_dummies(data['emotion']).values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) # 3. 数据增强 train_transform = CV.Compose([ CV.RandomCrop((44, 44)), CV.RandomHorizontalFlip(prob=0.5), CV.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5), CV.RandomRotation(30), CV.Rescale(1.0 / 255.0, 0.0) ]) test_transform = CV.Compose([ CV.Rescale(1.0 / 255.0, 0.0) ]) # 4. 搭建ResNet18模型 class ResNet18(nn.Cell): def __init__(self): super(ResNet18, self).__init__() self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, pad_mode='pad') self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid') self.layer1 = nn.SequentialCell( nn.ResidualBlock(64, 64), nn.ResidualBlock(64, 64) ) self.layer2 = nn.SequentialCell( nn.ResidualBlock(64, 128, stride=2), nn.ResidualBlock(128, 128) ) self.layer3 = nn.SequentialCell( nn.ResidualBlock(128, 256, stride=2), nn.ResidualBlock(256, 256) ) self.layer4 = nn.SequentialCell( nn.ResidualBlock(256, 512, stride=2), nn.ResidualBlock(512, 512) ) self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1, pad_mode='valid') self.flatten = nn.Flatten() self.fc = nn.Dense(512, 7) def construct(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = self.flatten(x) x = self.fc(x) return x # 5. 模型训练 batch_size = 32 train_dataset = ds.NumpySlicesDataset({"image": X_train, "label": y_train}, shuffle=True) train_dataset = train_dataset.map(input_columns=["image"], operations=train_transform, num_parallel_workers=4) train_dataset = train_dataset.batch(batch_size, drop_remainder=True) test_dataset = ds.NumpySlicesDataset({"image": X_test, "label": y_test}) test_dataset = test_dataset.map(input_columns=["image"], operations=test_transform, num_parallel_workers=4) test_dataset = test_dataset.batch(batch_size, drop_remainder=True) net = ResNet18() loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = nn.Momentum(net.trainable_params(), 0.01, 0.9) model = nn.Model(net, loss_fn, opt, metrics={"acc"}) context.set_context(mode=context.GRAPH_MODE, device_target="GPU") model.train(10, train_dataset) # 6. 模型测试 model.eval(test_dataset) # 7. 模型保存 save_checkpoint(model.train_network, "resnet18.ckpt") ``` 这是一个简单的示例代码,您可以根据实际情况进行修改。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值