【码】微调预训练resnet50进行树叶分类

attention:

1.解压数据集的方法

2.DataLoader自己的数据集方法

3.pandas写csv操作

4.制作分类的标签方法,注意到CrossEntropyLoss传入的不是one-hot,就是一个标量

5.使用gpu训练,也可用cpu测试,更改torch.load()

6.使用预训练的模型注意从哪部分开始,例如本次将ResNet的fc层更改

 

解压数据集:

import os
import zipfile

# os.chdir('./classify-leaves.zip')
data=zipfile.ZipFile('classify-leaves.zip','r')
data.extractall()

导入:

import pandas as pd
import numpy as np
import torch 
from torch import nn
import seaborn as sns
import copy
import os
from skimage import io,transform
from torch.utils.data import Dataset,DataLoader
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from torchvision import models
from torchvision import transforms,datasets,utils
import matplotlib.pyplot as plt
train=pd.read_csv('train.csv')
test=pd.read_csv('test.csv')
print(test.iloc[0].values[0])
images/18353.jpg

1.把图片按序号排列

2.制作标签(数字标号) 

path = "images/"
images = os.listdir(path)
images.sort(key=lambda x: int(x.split('.')[0]))

train_label=train.iloc[:,1]

classes=list(set(train_label))#176
classes.sort()
print(classes)
class_num = dict(zip(classes,range(len(classes))))

DataLoader:

class LeavesDataset(Dataset):

    def __init__(self, csv_file, root_dir, transform=None):
        self.csv_file=csv_file
        self.landmarks_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.landmarks_frame.iloc[idx, 0])
        image = io.imread(img_name)
        if self.transform:
            image = self.transform(image)
        if self.csv_file=='train.csv':
            l = self.landmarks_frame.iloc[idx, 1]
            label=torch.tensor(class_num[l])
            return image,label
        else:
            return image
    
    
train_data=LeavesDataset(csv_file='train.csv',root_dir='',
                        transform=transforms.ToTensor())

test_features=LeavesDataset(csv_file='test.csv',root_dir='',
                        transform=transforms.ToTensor())

train_loader=DataLoader(dataset=train_data,batch_size=32,shuffle=True,num_workers=1)
test_loader=DataLoader(dataset=test_features,batch_size=1000,shuffle=False)

 训练:

net=models.resnet50(pretrained=True)

for param in net.parameters():#冻结参数
    param.requires_grad_(False)

input_feature=net.fc.in_features
net.fc=nn.Sequential(
    nn.Linear(input_feature,512),
    nn.ReLU(),
    nn.Dropout(p=0.1),
    nn.Linear(512,176),
    
    )


def train(model,train_loader,optimizier,loss_func,epochs=10,train_rate=0.8):
    batch_num=len(train_loader)
    train_batch_num=round(batch_num*train_rate)
    best_w=copy.deepcopy(model.state_dict())
    best_acc=0.0
    train_loss_all,val_loss_all=[],[]
    for epoch in range(epochs):
        train_loss,val_loss=0,0
        train_acc,val_acc=0,0
        train_num,val_num=0,0
        for step,(b_x,b_y) in enumerate(train_loader):
            b_x=b_x.cuda()
            b_y=b_y.cuda()
            if step<train_batch_num:
                model.train()
                out=model(b_x)
                loss=loss_func(out,b_y)
                pre_lab=torch.argmax(out,1)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss+=loss.item()*b_x.size(0)
                train_acc+=torch.sum(pre_lab==b_y.data)
                train_num+=b_x.size(0)
            else:
                model.eval()
                out=model(b_x)
                pre_lab=torch.argmax(out,1)
                loss=loss_func(out,b_y)
                val_loss+=loss.item()*b_x.size(0)
                val_acc+=torch.sum(pre_lab==b_y.data)
                val_num+=b_x.size(0)
        
        train_loss=train_loss/train_num
        train_acc=train_acc.double()/train_num
        val_loss=val_loss/val_num
        val_acc=val_acc.double()/val_num
        print("Epoch {},train_acc:{},val_acc:{}".format(epoch+1,train_acc,val_acc))
        print(' ')
        if val_acc>best_acc:
            best_acc=val_acc
            best_w=copy.deepcopy(model.state_dict())
    model.load_state_dict(best_w)
    return model

use_gpu=torch.cuda.is_available()

model=net
optimizer=torch.optim.Adam(model.parameters(),lr=0.0001)
loss=nn.CrossEntropyLoss()

if use_gpu:
    model=model.cuda()
    loss=loss.cuda()

model_best=train(model,train_loader,optimizer,loss,epochs=40)
torch.save(model_best,'results/model2.pkl')

预测:

#预测
model=torch.load('results/model1.pkl')
test_image=[test.iloc[i].values[0] for i in range(len(test.iloc[:]))]


test_label=[]
for step,x in enumerate(test_loader):
    x=x.cuda()
    out=model(x)
    pred=torch.argmax(out,1).cpu().detach().numpy()
    print(pred)
    label=[classes[i] for i in pred]
    test_label.extend(label)
    


test_label= pd.Series(test_label,name='label')
test_image=pd.Series(test_image,name='image')
submission = pd.concat([test_image, test_label], axis=1)
submission.to_csv('results/submission1.csv', index=False)

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值