yolov5源码阅读:
1. train.py
- yolov5数据的格式
- path: shipdata 有train的images和labels, 有valid的images和labels
- train.txt 里面是绝对路径
- valid.txt里面也是绝对路径
- 然后替换的是train和valid
- 其他只要data=person.yaml即可,开始训练。
python train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (recommended)
注意cuda device
xml转txt格式》
导出onnx,需要protobuf >= 3.20.2, 而yolov5的train.py的loggers里面的wandb需要protobuf为3.19.0;这里的环境不兼容。
训练环境配置: from utils.loggers import Loggers
- 需要将protobuf版本给降低:
- pip show protobuf 版本为4.24.4
- yolov5训练不能超过3.20.x,所以要降级到3.19.0 pip install protobuf==3.19.0
发生异常: TypeError
Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
1. Downgrade the protobuf package to 3.20.x or lower.
2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates
File "/home/helen/code/gitee-yolov5/yolov5-work/yolov5/utils/loggers/wandb/wandb_utils.py", line 23, in <module>
import wandb
File "/home/helen/code/gitee-yolov5/yolov5-work/yolov5/utils/loggers/__init__.py", line 15, in <module>
from utils.loggers.wandb.wandb_utils import WandbLogger
File "/home/helen/code/gitee-yolov5/yolov5-work/yolov5/train.py", line 61, in <module>
from utils.loggers import Loggers
TypeError: Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
1. Downgrade the protobuf package to 3.20.x or lower.
2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).
# ship.yaml的内容
path: /home/yfr-gdsf/zhr_files/shipData/shipdata # dataset root dir
train: train.txt # train images (relative to 'path') 8219 images
val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images
# Classes
names:
0: boat
command
ls *.jpg > list.txt
result
0000000010.png
0000000020.png
0000000030.png
...
0000000100.png
0000000110.png
command
ls /home/xxx/input/*.png > list.txt
result
/home/xxx/input/0000000010.png
/home/xxx/input/0000000020.png
/home/xxx/input/0000000030.png
...
/home/xxx/input/0000000100.png
/home/xxx/input/0000000110.png
Callbacks类的研究和说明:handle all registered callbacks for v5 Hooks
-loggers对象定义
-保存混淆矩阵的地方
|---Loggers的on_train_end()
- 去看validate.run()为了看混淆矩阵,就要去看最后总的validate的表现
- 代码在:val.py的186行,定义了混淆矩阵的对象
参考笔记:唐宇迪yolo版本讲解笔记.md
- Loggers类有很多消息响应函数
下面的这个代码图结构,才是最合适的代码记录方法!!
```python
opt = parse
main(opt, callbacks=Callback()) # 这里有一个Callback的类
|----device = select_device() # cpu or cuda:0
|----train(opt.hyp, opt, device, callbacks)
| |----callbacks.run('on_pretrain_routine_start') # callbacks还是空的,还没有这个名字的消息响应函数,没添加,为空
| |----hyp = yaml.safe_load(f) # 加载yaml超参数的设置
| |----opt.hyp = hyp.copy()
| |----loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # 有超多on_的消息响应函数: loss记录,metric记录,保存信息
| | |---self.comet_logger = CometLogger(self.opt, self.hyp) # 也有非常多的消息响应函数的
| |----for k in methods(loggers): # methos(loggers)看这个类有哪些方法,然后去实现和做
callbacks.register_action(k, callback=getattr(loggers, k)) # action注册,将loggers中method全部注册到callbacks中去:主要是loggers和comet_logger
| |----data_dict = loggers.remote_dataset # 去获取数据的路径
| | |---self.data_dict = self.check_dataset(self.opt.data) # data_config = yaml.safe_load(f) # 读取Yaml文件,获取path信息,然后再
| |----init_seeds # 所有的随机数种子统一
| |----train_path, val_path = data_dict['train'], data_dict['val'] # 获取训练集和测试集的txt路径
| |----nc = 1 if single_cls else int(data_dict['nc']) # number of classes
| |----weights = attempt_download(weights) # 加载预训练模型
| |----ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak 加载权重信息到CPU,防止CUDA内存泄漏
| |----model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
| |----csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
| |----csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect list(csd.items())[-1]
| |----model.load_state_dict(csd, strict=False) # load 加载除了anchor后面的Detect()层的所有权重,加载
| |----amp = check_amp(model) # check AMP True 自动混合精度功能m.amp=True,混合精度训练,10%的差异是可以容忍的
| |----freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze 没有freeze,没有需要冻结的部分!!
| |----for k, v in model.named_parameters():
| |---- v.requires_grad = True # train all layers
| |---- # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
| |---- if any(x in k for x in freeze):
| |---- LOGGER.info(f'freezing {k}')
| |---- v.requires_grad = False
| |----nbs = 64 # nominal batch size, 每4个batch就进行一次
| |----accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
| |----hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
| |----optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay'])
| |----lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
| |----scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) 直线下降的东西
| |----ema = ModelEMA(model) if RANK in {-1, 0} else None # 指数移动平均:
| |----train_loader, dataset = create_dataloader(train_path)
| |----val_loader = create_dataloader(val_path)
| |----|----LoadImageAndLabels # 共44311个labels(共4.4311W个目标) 6400个训练集,16=bs,400个mini-batch
| |----check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor train-dataset作为输入的;
| | |---wh是lables的wh,anchors的wh,计算比例
| |----model.half().float()
| |----callbacks.run('on_pretrain_routine_end', labels, names) # logger里面的
| | |---plot_labels_name # 对x,y,w,h进行相关性统计
| | |---x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
| | |---sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
| |----超参数设置:hyp-box, cls, obj, class_weights等
| |----3个epoch作为warmup
| |----stopper, stop = EarlyStopping(patience=opt.patience), False
| |----compute_loss = ComputeLoss(model) # init loss class
| |----callbacks.run('on_train_start')
| |----for epoch in range(0, 300):
| | |---model.train()
| | |---optimizer.zero_grad()
| | |---for imgs, targets in train_loader: # 这里会调用getitem函数,并进行transform处理
| | | |---callbacks.run('on_train_batch_start')
| | | |---optimizer.param_groups # lr和momentum重新赋值:尤其是warmup阶段;用很小的学习率训练,train
| | | |---pred = model(imgs)
| | | |---loss, loss_items = compute_loss(pred, targets) # 计算loss函数
| | | | |---build_targets(pred, targets) # 正负样本匹配
| | | |---scaler.scale(loss).backward()
# 查看python版本和torch版本
import platform
platform.python_version()
torch.__version__
os.environ['CUDA_VISIBLE_DEVICES'] = device
# 查看显卡类型和现存容量:
p = torch.cuda.get_device_properties('0')
p.name, p.total_memory / (1<<20) MiB
device = torch.device('cuda:0')
# 查看一个类的所有函数:method,不包含__的办法:
def methods(instance): # 实例
return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith('__')]
结论反转:『网络在反着学习』
先上错误结论的得出过程:
- 错误原因1:weight[0,:]这里代表的是xywh里面的x,偏移量的学习,本身就有正有负;不能作为分类器选择的含义;这里是增益的意思;
- 错误原因2:即使是conf那个维度,weight[4,:]其分布也是正态分布!!!!不是所想的bias的反向分类器。
- 错误原因3:左侧的特征图是grad_cam PCA后的结果;————cv2.applycolormap的那个函数把我的高激活的部分给反转了!!!
PCA反转:反转的意思是提取高激活值。高激活是少部分,那么就提取出来了!主成分提取:看数值高的和多的是哪一部分??? - PCA提取的主要成分图:这里有一点的变化,要去排查一些,这个问题就可以close了,关闭了。
下面出示正确结论的证据:
- 黄色的plot线为最后一层head层的Detect的conv的权重,可以发现:其weight是正态分布,并没有出现反向分类器的特点!!!!
- 进一步的证据:下面是weight的支付图,看起来是正态分布的
- 关键证据:如下图是关键证据:乘积显示,高激活的正值对高conf的贡献更大,占了4,激活元的负值*conv权重=1.8,占30%, 60%是高激活占据的。
- 这个分类器对正负样本的特征选择:一部分是正样本激活值,一部分是负样本激活值。而分类在这些正负样本特征上,正的为正,负的为负;就可以做出正确的判断了
- 结论:特征最后,有对正样本的特征表示,也有对负样本的特征表示!!
- 对负样本高激活值的一点点负数表示,就可以把整个conf给拉低到负值。
- 所以关键是正负样本的激活在维度上是分开的,离散的即可。然后靠分类器进行正样本特征提取和负样本反向挑选即可。
- 然后在个数上,正负样本特征的占比,这个问题是另外考虑的。跟loss的设计有关系!!
- 毕竟yolov5里面,只有objloss是正负样本都计算的!!!19200个样本,里面是300个正样本!!! BCE的objloss中,可是正负样本不平衡哦!!
- 所以训练出来这样的obj分类器也是可以理解的!!!!
- bbox loss和clas_loss是只计算正样本的!!注意这一点。
2023年8月29日17:39:23 更新
三个yolo层的输出:(模型输出3个yolo层,detect层的输出) >0.25的anchor给描绘出来,每个grid格子是3个anchor;可以看到三个层是怎么和目标对应起来的;这里是预测。
CAM方法
- 拉yolov5源码
在V5的模型中:第17-C3,20-C3,23-C3是三个yolo层的输出,需要明确一下哪一层的输出捕获到了目标?
- C3的前向传播:C3的 conv3是C3模块的最后输出;通过看C3和bottleNeck的forward代码可以看出;self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
C3(
(cv1): Conv(
(conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(cv2): Conv(
(conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(cv3): Conv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(cv1): Conv(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(cv2): Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
C3的cv3有:conv, bn, act三部分,选择这三个分别来看特征图的激活情况,如下:
import sys
sys.path.insert(0, '/home/helen/code/deep-learning-for-image-processing/pytorch-grad-cam')
from pytorch_grad_cam import EigenCAM
import warnings
# warnings.filterwarnings('ignore')
# warnings.simplefilter('ignore')
import torch
import cv2
import numpy as np
import requests
import torchvision.transforms as transforms
from pytorch_grad_cam import EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
from PIL import Image
COLORS = np.random.uniform(0, 255, size=(80, 3))
img_path = '/home/helen/dataset/FLIR/FLIR_ADAS/val/images/FLIR_00106.jpeg'
# img = np.array(Image.open(img_path))
img = cv2.imread(img_path)
# padded_image = cv2.resize(img, (640,640))
# padded_image = np.pad(img, ((0, 128),(0,0), (0,0)), mode='constant')#, constant_values=img.mean())
# padded_image = np.pad(img, ((0, 128),(0,0), (0,0)), mode='constant', constant_values=img.mean())
padded_image = img
img = np.float32(padded_image) / 255
transform = transforms.ToTensor()
tensor = transform(img).unsqueeze(0)
import yaml
from models.yolo import Model
device = 'cuda:0'
cfg = '/home/helen/code/yolov5/models/yolov5s.yaml'
nc = 2
hypfile = '/home/helen/code/yolov5/data/hyps/hyp.scratch-low.yaml'
with open(hypfile, errors='ignore') as f:
hyp = yaml.safe_load(f)
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
weights = '/home/helen/code/yolov5/best.pt'
ckpt = torch.load(weights, map_location='cpu')
model.load_state_dict(ckpt['model'].float().state_dict(), strict=False)
model.eval()
target_layers = [model.model[17].cv3, model.model[20].cv3, model.model[23].cv3]
tensor = tensor.to('cuda:0')
all_cam_image = None
all_gray_cam = None
for l in target_layers:
cam = EigenCAM(model, target_layers=[l], use_cuda=True)
grayscale_cam = cam(tensor.to('cuda:0'), targets=[0])[0, :, :]
cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True)
if all_cam_image is None:
all_cam_image = cam_image
all_gray_cam = grayscale_cam
else:
all_cam_image = np.hstack((all_cam_image,cam_image))
all_gray_cam = np.hstack((all_gray_cam, grayscale_cam))
plt.imshow(all_cam_image)
plt.title('yolo_layrs: 1大目标, 2中目标, 3小目标')
plt.show()