PYTORCH-MASK-RCNN环境配置训练-预测-评估-打印指标

王正义123

于 2024-08-27 15:12:53 发布

阅读量566

点赞数 12

文章标签： pytorch 人工智能 python

本文链接：https://blog.csdn.net/ghuuj90uio/article/details/141602905

版权

Github链接：aotumanbiu/Pytorch-Mask-RCNN：根据Pytorch官方教程实现 Mask-RCNN，其 backbone为ResNet50+FPN。现在完成了对于示例数据集的训练，后续会继续修改，实现其他的功能。 (github.com)

环境配置方法：pip install torch==1.10.0+cu102 torchvision==0.11.0+cu102 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html

还有（如有必要）

D:\Anaconda\envs\py38\Lib\site-packages\torchvision\transforms\functional.py增加函数：

def get_dimensions(image_tensor):

if not isinstance(image_tensor, torch.Tensor):

raise TypeError("输入必须是一个 PyTorch Tensor。")

if image_tensor.ndim != 3:

raise ValueError("输入张量的维度必须为 3，格式为 [C, H, W]。")

# 获取通道数、高度和宽度

channels, height, width = image_tensor.shape

return channels, height, width

解决维度报错。

以下是训练所做的修改：

另一种方法：

1.先进行数据集转换，PennyFudnn数据集。要求xml文件先转txt再转现在的。

代码：

import copy

from lxml.etree import Element, SubElement, tostring, ElementTree

import numpy as np

import xml.etree.ElementTree as ET

import pickle

import os

from os import listdir, getcwd

from os.path import join

classes = ["Anabaena", "Chlorella", "Dinoflagellate", "Navicula", "Straight_algae", "Tetraphyllans_oblique"] # 类别

CURRENT_DIR = 'E:/Pytorch-Mask-RCNN-master/Annotations1/VOCdevkit/VOC2007'

from PIL import Image

def get_image_size(image_path):

with Image.open(image_path) as img:

width, height = img.size

return width, height

image_path = 'E:/Pytorch-Mask-RCNN-master/PennFudanPed/PNGImages'

image_path1 = 'E:/Pytorch-Mask-RCNN-master/Annotations1'

image_path2 = 'PennFudanPed/PedMasks'

files = os.listdir(image_path)

for imgq in files:

file_path1 = os.path.join(image_path, imgq)

width, height = get_image_size(file_path1)

imgq1=imgq[:-4]

imgq2=imgq[:-4]+".txt"

imgq3=image_path2+"/"+imgq1+".png"

imgqq3="\""+imgq3+"\""

file_path2 = os.path.join(image_path1, imgq2)

file_path2=file_path2.replace("\\","/")

file_path3=open(file_path2,encoding='UTF-8')

ant='9'

iuu=0

mub="iii"

as1=[]

as2=[]

for ms in file_path3.readlines():

ms=ms.split(" ",4)

if ms[0]=="0":

ant=classes[0]

if ms[0]=="1":

ant=classes[1]

if ms[0]=="2":

ant=classes[2]

if ms[0]=="3":

ant=classes[3]

if ms[0]=="4":

ant=classes[4]

if ms[0]=="5":

ant=classes[5]

iuu=iuu+1

xmin=int(ms[1])

xmax=int(ms[2])

ymin=int(ms[3])

ymax=int(ms[4][:-1])

as1=[xmin,xmax,ymin,ymax]

as2.append(as1)

mub="\""+str(ant)+"\" "

mub0="\""+str(ant)+"\""

mub=mub*iuu

out_file1 = open('E:/Pytorch-Mask-RCNN-master/Annotations2/%s.txt' % imgq1, 'w', encoding='UTF-8')

out_file1.write("# Compatible with PASCAL Annotation Version 1.00" + '\n')

out_file1.write("Image filename :"+"\""+"PennFudanPed/PNGImages/"+ str(imgq1)+".png"+"\""+'\n')

out_file1.write("Image size (X x Y x C) : "+str(width)+" x "+str(height)+" x 3"+'\n')

out_file1.write("Database : "+"\""+"The Penn-Fudan-Pedestrian Database"+"\""+'\n')

out_file1.write("Objects with ground truth : "+str(iuu)+" { "+str(mub)+"}"+'\n')

out_file1.write("# Note there may be some objects not included in the ground truth list for they are severe-occluded"+'\n')

out_file1.write("# or have very small size."+'\n')

out_file1.write("# Top left pixel co-ordinates : (1, 1)"+'\n')

for i in range(1,iuu+1):

out_file1.write("# Details for pedestrian "+str(i)+" ("+str(mub0)+")"+"\n")

out_file1.write("Original label for object "+str(i)+" "+str(mub0)+" : "+str(mub0)+"\n")

out_file1.write("Bounding box for object "+str(i)+" "+str(mub0)+" (Xmin, Ymin) - (Xmax, Ymax) : ("+str(as2[i-1][0])+", "+str(as2[i-1][2])+") - ("+str(as2[i-1][1])+", "+str(as2[i-1][3])+")"+"\n")

out_file1.write("Pixel mask for object "+str(i)+" "+str(mub0)+" : "+"\""+str(imgq3)+"\""+"\n")

out_file1.write("\n")

2.再修改训练代码：

detection\engine.py 路径下添加函数：

@torch.inference_mode()

def evaluate(model, data_loader, device):

n_threads = torch.get_num_threads()

torch.set_num_threads(1)

cpu_device = torch.device("cpu")

model.eval()

metric_logger = utils.MetricLogger(delimiter=" ")

header = "Test:"

coco = get_coco_api_from_dataset(data_loader.dataset)

iou_types = _get_iou_types(model)

coco_evaluator = CocoEvaluator(coco, iou_types)

for images, targets in metric_logger.log_every(data_loader, 100, header):

images = list(img.to(device) for img in images)

if torch.cuda.is_available():

torch.cuda.synchronize()

model_time = time.time()

outputs = model(images)

outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

model_time = time.time() - model_time

res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

evaluator_time = time.time()

coco_evaluator.update(res)

evaluator_time = time.time() - evaluator_time

metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

metric_logger.synchronize_between_processes()

print("Averaged stats:", metric_logger)

coco_evaluator.synchronize_between_processes()

# accumulate predictions from all images

coco_evaluator.accumulate()

coco_evaluator.summarize()

stat=coco_evaluator.stats

# map F1s

print(f"mAP: {stat[1] * 100:.2f}%") # mAP@0.5:0.95

# 精确度和召回率

precision = stat[5] # Precision

recall = stat[11] # Recall

f1_score = 2 * (precision * recall) / (precision + recall + 1e-6) # 加一个小值防止除0

print(f"Precision: {precision * 100:.2f}%")

print(f"Recall: {recall * 100:.2f}%")

print(f"F1 Score: {f1_score* 100:.2f}%")

torch.set_num_threads(n_threads)

return coco_evaluator

3. class CocoEvaluator:下面进行三处修改

detection\coco_eval.py

第一处，增加self.stats[]

第二处，summarize下修改

第三处，eval改为evaluate，如有必要。

4.训练：修改最后一部分代码train.py

num_epochs = 60

for epoch in range(num_epochs):

train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)

# 调整学习率

lr_scheduler.step()

# 评估模型

evaluate(model, data_loader_test, device=device)

output_dir="E:/Pytorch-Mask-RCNN-master"

checkpoint = {

"model": model.state_dict(),

"optimizer": optimizer.state_dict(),

"lr_scheduler": lr_scheduler.state_dict(),

"epoch": epoch,

}

utils.save_on_master(checkpoint, os.path.join(output_dir, f"model_{epoch}.pth"))

utils.save_on_master(checkpoint, os.path.join(output_dir, "checkpoint.pth"))

print("That's it!")

注意，是工程根目录下的train.py训练

注意修改路径，如训练数据集文件路径等，用绝对路径。

如果不管用，可以采用如下类似方法

这个是具体的修改train.py的位置。

5.其他操作：

Predict.py下修改

换为跑出权重的路径。

指令格式：python train.py / test.py

王正义123

关注

12
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫