环境配置方法:pip install torch==1.10.0+cu102 torchvision==0.11.0+cu102 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html
还有(如有必要)
D:\Anaconda\envs\py38\Lib\site-packages\torchvision\transforms\functional.py增加函数:
def get_dimensions(image_tensor):
if not isinstance(image_tensor, torch.Tensor):
raise TypeError("输入必须是一个 PyTorch Tensor。")
if image_tensor.ndim != 3:
raise ValueError("输入张量的维度必须为 3,格式为 [C, H, W]。")
# 获取通道数、高度和宽度
channels, height, width = image_tensor.shape
return channels, height, width
解决维度报错。
以下是训练所做的修改:
另一种方法:
1.先进行数据集转换,PennyFudnn数据集。要求xml文件先转txt再转现在的。
代码:
import copy
from lxml.etree import Element, SubElement, tostring, ElementTree
import numpy as np
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
classes = ["Anabaena", "Chlorella", "Dinoflagellate", "Navicula", "Straight_algae", "Tetraphyllans_oblique"] # 类别
CURRENT_DIR = 'E:/Pytorch-Mask-RCNN-master/Annotations1/VOCdevkit/VOC2007'
from PIL import Image
def get_image_size(image_path):
with Image.open(image_path) as img:
width, height = img.size
return width, height
image_path = 'E:/Pytorch-Mask-RCNN-master/PennFudanPed/PNGImages'
image_path1 = 'E:/Pytorch-Mask-RCNN-master/Annotations1'
image_path2 = 'PennFudanPed/PedMasks'
files = os.listdir(image_path)
for imgq in files:
file_path1 = os.path.join(image_path, imgq)
width, height = get_image_size(file_path1)
imgq1=imgq[:-4]
imgq2=imgq[:-4]+".txt"
imgq3=image_path2+"/"+imgq1+".png"
imgqq3="\""+imgq3+"\""
file_path2 = os.path.join(image_path1, imgq2)
file_path2=file_path2.replace("\\","/")
file_path3=open(file_path2,encoding='UTF-8')
ant='9'
iuu=0
mub="iii"
as1=[]
as2=[]
for ms in file_path3.readlines():
ms=ms.split(" ",4)
if ms[0]=="0":
ant=classes[0]
if ms[0]=="1":
ant=classes[1]
if ms[0]=="2":
ant=classes[2]
if ms[0]=="3":
ant=classes[3]
if ms[0]=="4":
ant=classes[4]
if ms[0]=="5":
ant=classes[5]
iuu=iuu+1
xmin=int(ms[1])
xmax=int(ms[2])
ymin=int(ms[3])
ymax=int(ms[4][:-1])
as1=[xmin,xmax,ymin,ymax]
as2.append(as1)
mub="\""+str(ant)+"\" "
mub0="\""+str(ant)+"\""
mub=mub*iuu
out_file1 = open('E:/Pytorch-Mask-RCNN-master/Annotations2/%s.txt' % imgq1, 'w', encoding='UTF-8')
out_file1.write("# Compatible with PASCAL Annotation Version 1.00" + '\n')
out_file1.write("Image filename :"+"\""+"PennFudanPed/PNGImages/"+ str(imgq1)+".png"+"\""+'\n')
out_file1.write("Image size (X x Y x C) : "+str(width)+" x "+str(height)+" x 3"+'\n')
out_file1.write("Database : "+"\""+"The Penn-Fudan-Pedestrian Database"+"\""+'\n')
out_file1.write("Objects with ground truth : "+str(iuu)+" { "+str(mub)+"}"+'\n')
out_file1.write("# Note there may be some objects not included in the ground truth list for they are severe-occluded"+'\n')
out_file1.write("# or have very small size."+'\n')
out_file1.write("# Top left pixel co-ordinates : (1, 1)"+'\n')
for i in range(1,iuu+1):
out_file1.write("# Details for pedestrian "+str(i)+" ("+str(mub0)+")"+"\n")
out_file1.write("Original label for object "+str(i)+" "+str(mub0)+" : "+str(mub0)+"\n")
out_file1.write("Bounding box for object "+str(i)+" "+str(mub0)+" (Xmin, Ymin) - (Xmax, Ymax) : ("+str(as2[i-1][0])+", "+str(as2[i-1][2])+") - ("+str(as2[i-1][1])+", "+str(as2[i-1][3])+")"+"\n")
out_file1.write("Pixel mask for object "+str(i)+" "+str(mub0)+" : "+"\""+str(imgq3)+"\""+"\n")
out_file1.write("\n")
2.再修改训练代码:
detection\engine.py 路径下添加函数:
@torch.inference_mode()
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = "Test:"
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
stat=coco_evaluator.stats
# map F1s
print(f"mAP: {stat[1] * 100:.2f}%") # mAP@0.5:0.95
# 精确度和召回率
precision = stat[5] # Precision
recall = stat[11] # Recall
f1_score = 2 * (precision * recall) / (precision + recall + 1e-6) # 加一个小值防止除0
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1_score* 100:.2f}%")
torch.set_num_threads(n_threads)
return coco_evaluator
3. class CocoEvaluator:下面进行三处修改
detection\coco_eval.py
第一处,增加self.stats[]
第二处,summarize下修改
第三处,eval改为evaluate,如有必要。
4.训练:修改最后一部分代码train.py
num_epochs = 60
for epoch in range(num_epochs):
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# 调整学习率
lr_scheduler.step()
# 评估模型
evaluate(model, data_loader_test, device=device)
output_dir="E:/Pytorch-Mask-RCNN-master"
checkpoint = {
"model": model.state_dict(),
"optimizer": optimizer.state_dict(),
"lr_scheduler": lr_scheduler.state_dict(),
"epoch": epoch,
}
utils.save_on_master(checkpoint, os.path.join(output_dir, f"model_{epoch}.pth"))
utils.save_on_master(checkpoint, os.path.join(output_dir, "checkpoint.pth"))
print("That's it!")
注意,是工程根目录下的train.py训练
注意修改路径,如训练数据集文件路径等,用绝对路径。
如果不管用,可以采用如下类似方法
这个是具体的修改train.py的位置。
5.其他操作:
Predict.py下修改
换为跑出权重的路径。
指令格式:python train.py / test.py