PYTORCH-MASK-RCNN环境配置训练-预测-评估-打印指标

Github链接:aotumanbiu/Pytorch-Mask-RCNN: 根据Pytorch官方教程实现 Mask-RCNN,其 backbone为ResNet50+FPN。现在完成了对于示例数据集的训练,后续会继续修改,实现其他的功能。 (github.com)

环境配置方法:pip install torch==1.10.0+cu102 torchvision==0.11.0+cu102 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html

还有(如有必要)

D:\Anaconda\envs\py38\Lib\site-packages\torchvision\transforms\functional.py增加函数:

def get_dimensions(image_tensor):

    if not isinstance(image_tensor, torch.Tensor):

        raise TypeError("输入必须是一个 PyTorch Tensor")

    if image_tensor.ndim != 3:

        raise ValueError("输入张量的维度必须为 3,格式为 [C, H, W]")

    # 获取通道数、高度和宽度

    channels, height, width = image_tensor.shape

    return channels, height, width

解决维度报错。

以下是训练所做的修改:

另一种方法:

1.先进行数据集转换,PennyFudnn数据集。要求xml文件先转txt再转现在的。

代码:

import copy

from lxml.etree import Element, SubElement, tostring, ElementTree

import numpy as np

import xml.etree.ElementTree as ET

import pickle

import os

from os import listdir, getcwd

from os.path import join

classes = ["Anabaena", "Chlorella", "Dinoflagellate", "Navicula", "Straight_algae", "Tetraphyllans_oblique"]  # 类别

CURRENT_DIR = 'E:/Pytorch-Mask-RCNN-master/Annotations1/VOCdevkit/VOC2007'

from PIL import Image

def get_image_size(image_path):

    with Image.open(image_path) as img:

        width, height = img.size

        return width, height

image_path = 'E:/Pytorch-Mask-RCNN-master/PennFudanPed/PNGImages'

image_path1 = 'E:/Pytorch-Mask-RCNN-master/Annotations1'

image_path2 = 'PennFudanPed/PedMasks'

files = os.listdir(image_path)

for imgq in files:

    file_path1 = os.path.join(image_path, imgq)

    width, height = get_image_size(file_path1)

    imgq1=imgq[:-4]

    imgq2=imgq[:-4]+".txt"

    imgq3=image_path2+"/"+imgq1+".png"

    imgqq3="\""+imgq3+"\""

    file_path2 = os.path.join(image_path1, imgq2)

    file_path2=file_path2.replace("\\","/")

    file_path3=open(file_path2,encoding='UTF-8')

    ant='9'

    iuu=0

    mub="iii"

    as1=[]

    as2=[]

    for ms in file_path3.readlines():

        ms=ms.split(" ",4)

        if ms[0]=="0":

            ant=classes[0]

        if ms[0]=="1":

            ant=classes[1]

        if ms[0]=="2":

            ant=classes[2]

        if ms[0]=="3":

            ant=classes[3]

        if ms[0]=="4":

            ant=classes[4]

        if ms[0]=="5":

            ant=classes[5]

        iuu=iuu+1

        xmin=int(ms[1])

        xmax=int(ms[2])

        ymin=int(ms[3])

        ymax=int(ms[4][:-1])

        as1=[xmin,xmax,ymin,ymax]

        as2.append(as1)

    mub="\""+str(ant)+"\" "

    mub0="\""+str(ant)+"\""

    mub=mub*iuu

    out_file1 = open('E:/Pytorch-Mask-RCNN-master/Annotations2/%s.txt' % imgq1, 'w', encoding='UTF-8')

    out_file1.write("# Compatible with PASCAL Annotation Version 1.00" + '\n')

    out_file1.write("Image filename :"+"\""+"PennFudanPed/PNGImages/"+ str(imgq1)+".png"+"\""+'\n')

    out_file1.write("Image size (X x Y x C) : "+str(width)+" x "+str(height)+" x 3"+'\n')

    out_file1.write("Database : "+"\""+"The Penn-Fudan-Pedestrian Database"+"\""+'\n')

    out_file1.write("Objects with ground truth : "+str(iuu)+" { "+str(mub)+"}"+'\n')

    out_file1.write("# Note there may be some objects not included in the ground truth list for they are severe-occluded"+'\n')

    out_file1.write("# or have very small size."+'\n')

    out_file1.write("# Top left pixel co-ordinates : (1, 1)"+'\n')

    for i in range(1,iuu+1):

        out_file1.write("# Details for pedestrian "+str(i)+" ("+str(mub0)+")"+"\n")

        out_file1.write("Original label for object "+str(i)+" "+str(mub0)+" : "+str(mub0)+"\n")

        out_file1.write("Bounding box for object "+str(i)+" "+str(mub0)+" (Xmin, Ymin) - (Xmax, Ymax) : ("+str(as2[i-1][0])+", "+str(as2[i-1][2])+") - ("+str(as2[i-1][1])+", "+str(as2[i-1][3])+")"+"\n")

        out_file1.write("Pixel mask for object "+str(i)+" "+str(mub0)+" : "+"\""+str(imgq3)+"\""+"\n")

        out_file1.write("\n")

   

2.再修改训练代码:

detection\engine.py 路径下添加函数:

@torch.inference_mode()

def evaluate(model, data_loader, device):

    n_threads = torch.get_num_threads()

    torch.set_num_threads(1)

    cpu_device = torch.device("cpu")

    model.eval()

    metric_logger = utils.MetricLogger(delimiter="  ")

    header = "Test:"

    coco = get_coco_api_from_dataset(data_loader.dataset)

    iou_types = _get_iou_types(model)

    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):

        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():

            torch.cuda.synchronize()

        model_time = time.time()

        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

        evaluator_time = time.time()

        coco_evaluator.update(res)

        evaluator_time = time.time() - evaluator_time

        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    metric_logger.synchronize_between_processes()

    print("Averaged stats:", metric_logger)

    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images

    coco_evaluator.accumulate()

    coco_evaluator.summarize()

    stat=coco_evaluator.stats

    # map F1s

    print(f"mAP: {stat[1] * 100:.2f}%")  # mAP@0.5:0.95

    # 精确度和召回率

    precision = stat[5]  # Precision

    recall = stat[11]      # Recall

    f1_score = 2 * (precision * recall) / (precision + recall + 1e-6)  # 加一个小值防止除0

    print(f"Precision: {precision * 100:.2f}%")

    print(f"Recall: {recall * 100:.2f}%")

    print(f"F1 Score: {f1_score* 100:.2f}%")

    torch.set_num_threads(n_threads)

    return coco_evaluator

3. class CocoEvaluator:下面进行三处修改

detection\coco_eval.py

第一处,增加self.stats[]

第二处,summarize下修改

第三处,eval改为evaluate,如有必要。

4.训练:修改最后一部分代码train.py

 num_epochs = 60

    for epoch in range(num_epochs):

        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)

        # 调整学习率

        lr_scheduler.step()

        # 评估模型

        evaluate(model, data_loader_test, device=device)

        output_dir="E:/Pytorch-Mask-RCNN-master"

        checkpoint = {

                "model": model.state_dict(),

                "optimizer": optimizer.state_dict(),

                "lr_scheduler": lr_scheduler.state_dict(),

                "epoch": epoch,

            }

        utils.save_on_master(checkpoint, os.path.join(output_dir, f"model_{epoch}.pth"))

        utils.save_on_master(checkpoint, os.path.join(output_dir, "checkpoint.pth"))

print("That's it!")

注意,是工程根目录下的train.py训练

注意修改路径,如训练数据集文件路径等,用绝对路径。

如果不管用,可以采用如下类似方法

这个是具体的修改train.py的位置。

5.其他操作:

Predict.py下修改

换为跑出权重的路径。

指令格式:python train.py / test.py

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值