作为一个马上就要毕业,但还是什么东西都写不出来,什么东西都还不会的菜鸡,决定教大家如何快速水出一篇普刊,帮助各位水硕们“有水平的”毕业,如果是本科的话(92人上人除外),双非小子们,速速狠狠的学习如何水论文吧!!!废话少说,大的要来啦!!!!!!!!!!!!
第一步,下载vscode,租一台服务器,然后进入到服务器界面,具体教程请查看我的上一篇博客
《满怀美梦的小崽子是pycharm主义者》之服务器部署stable diffusion /diffusers教程_stablediffusionpipeline.from_single_file-CSDN博客总之,本篇博客会直接从服务器开始,界面如下:
进入服务器后,我们用到的项目主要有两个,一个是
GitHub - bubbliiiing/yolov8-pytorch: 这是一个yolov8-pytorch的仓库,可以用于训练自己的数据集。
是一个带注释的yolov8的项目,如果你稍微懂一点原理的话,那么看这个项目应该是完全没有问题,如果您什么也不懂的话,那也没关系,看就完了。
不过。。。。。。
另一个项目是
俗话说得好,少壮不努力,毕业注意力,实在写不出论文的话,不如试试加点注意力吧,这个项目里把大部分的注意力模块都整合在一起了,关键是即插即用的,所以,改起来非常的方便,先拖个权重文件进来,试试yolov8的环境怎么样
直接跑一跑predict.py这个文件,试试水
直接报错,爽!!!原来是权重文件位置放错了,没关系,打开yolo.py,第28行,直接把地址文件改成你的权重文件的地址即可
推理成功,ok,直接开始改模型,嘿嘿嘿
在train.py文件里,默认选择的训练数据集时voc2007,所以我们直接使用voc2007数据集即可,但是相信大部分人一看到块小一个g的压缩包就不想搞了,更别说coco20多个g的数据集,但是!!!!!俺们用的可是服务器啊兄弟们,服务器的好处就是,什么都有!!!
直接在autodl-pub路径下找到VOCdevkit文件夹,把VOC2007.tar.gz复制到yolov8路径下
然后输入以下指令
tar -zxvf VOC2007.tar.gz
芜湖!!!从复制过来,到数据集解压,全程不超过一分钟,这就是服务器!!!!!!!!
直接执行
python voc_annotation.py
报错!爽!!!!!!!!!!!
原来是地址不对,我直接解压的,名字就叫VOC2007,别人是下过来的,所以数据集还包在VOCdevkit文件夹下,好急!!!!!!
直接新建一个VOCdevkit文件夹,再把VOC2007文件夹拖进去,顺从他,他说咋搞就咋搞,不改他的代码
芜湖!!执行成功!!!
直接执行
python train.py
芜湖!!跑起来咧!!!!
接下来,往里边加点料试试
试试这个MobileViTv2Attention怎么说
把整个文件直接复制到nets/yolo.py文件里
再在111行注册这个注意力机制,在162行直接调用,怕你看不懂,直接代码贴给你
import numpy as np
import torch
import torch.nn as nn
from nets.backbone import Backbone, C2f, Conv
from nets.yolo_training import weights_init
from utils.utils_bbox import make_anchors
def fuse_conv_and_bn(conv, bn):
# 混合Conv2d + BatchNorm2d 减少计算量
# Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
fusedconv = nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
dilation=conv.dilation,
groups=conv.groups,
bias=True).requires_grad_(False).to(conv.weight.device)
# 准备kernel
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
# 准备bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fusedconv
class DFL(nn.Module):
# DFL模块
# Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
def __init__(self, c1=16):
super().__init__()
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
x = torch.arange(c1, dtype=torch.float)
self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
self.c1 = c1
def forward(self, x):
# bs, self.reg_max * 4, 8400
b, c, a = x.shape
# bs, 4, self.reg_max, 8400 => bs, self.reg_max, 4, 8400 => b, 4, 8400
# 以softmax的方式,对0~16的数字计算百分比,获得最终数字。
return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
# return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
#---------------------------------------------------#
# yolo_body
#---------------------------------------------------#
class YoloBody(nn.Module):
def __init__(self, input_shape, num_classes, phi, pretrained=False):
super(YoloBody, self).__init__()
depth_dict = {'n' : 0.33, 's' : 0.33, 'm' : 0.67, 'l' : 1.00, 'x' : 1.00,}
width_dict = {'n' : 0.25, 's' : 0.50, 'm' : 0.75, 'l' : 1.00, 'x' : 1.25,}
deep_width_dict = {'n' : 1.00, 's' : 1.00, 'm' : 0.75, 'l' : 0.50, 'x' : 0.50,}
dep_mul, wid_mul, deep_mul = depth_dict[phi], width_dict[phi], deep_width_dict[phi]
base_channels = int(wid_mul * 64) # 64
base_depth = max(round(dep_mul * 3), 1) # 3
#-----------------------------------------------#
# 输入图片是3, 640, 640
#-----------------------------------------------#
#---------------------------------------------------#
# 生成主干模型
# 获得三个有效特征层,他们的shape分别是:
# 256, 80, 80
# 512, 40, 40
# 1024 * deep_mul, 20, 20
#---------------------------------------------------#
self.backbone = Backbone(base_channels, base_depth, deep_mul, phi, pretrained=pretrained)
#------------------------加强特征提取网络------------------------#
self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
# 1024 * deep_mul + 512, 40, 40 => 512, 40, 40
self.conv3_for_upsample1 = C2f(int(base_channels * 16 * deep_mul) + base_channels * 8, base_channels * 8, base_depth, shortcut=False)
# 768, 80, 80 => 256, 80, 80
self.conv3_for_upsample2 = C2f(base_channels * 8 + base_channels * 4, base_channels * 4, base_depth, shortcut=False)
# 256, 80, 80 => 256, 40, 40
self.down_sample1 = Conv(base_channels * 4, base_channels * 4, 3, 2)
# 512 + 256, 40, 40 => 512, 40, 40
self.conv3_for_downsample1 = C2f(base_channels * 8 + base_channels * 4, base_channels * 8, base_depth, shortcut=False)
# 512, 40, 40 => 512, 20, 20
self.down_sample2 = Conv(base_channels * 8, base_channels * 8, 3, 2)
# 1024 * deep_mul + 512, 20, 20 => 1024 * deep_mul, 20, 20
self.conv3_for_downsample2 = C2f(int(base_channels * 16 * deep_mul) + base_channels * 8, int(base_channels * 16 * deep_mul), base_depth, shortcut=False)
#------------------------加强特征提取网络------------------------#
ch = [base_channels * 4, base_channels * 8, int(base_channels * 16 * deep_mul)]
self.shape = None
self.nl = len(ch)
# self.stride = torch.zeros(self.nl)
self.stride = torch.tensor([256 / x.shape[-2] for x in self.backbone.forward(torch.zeros(1, 3, 256, 256))]) # forward
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
self.no = num_classes + self.reg_max * 4 # number of outputs per anchor
self.num_classes = num_classes
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], num_classes) # channels
self.cv2 = nn.ModuleList(nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, num_classes, 1)) for x in ch)
if not pretrained:
weights_init(self)
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
self.MobileViTv2Attention2 = MobileViTv2Attention(d_model=80)
self.MobileViTv2Attention3 = MobileViTv2Attention(d_model=40)
self.MobileViTv2Attention4 = MobileViTv2Attention(d_model=20)
def fuse(self):
print('Fusing layers... ')
for m in self.modules():
if type(m) is Conv and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
return self
def forward(self, x):
# backbone
feat1, feat2, feat3 = self.backbone.forward(x)
#------------------------加强特征提取网络------------------------#
# 1024 * deep_mul, 20, 20 => 1024 * deep_mul, 40, 40
P5_upsample = self.upsample(feat3)
# 1024 * deep_mul, 40, 40 cat 512, 40, 40 => 1024 * deep_mul + 512, 40, 40
P4 = torch.cat([P5_upsample, feat2], 1)
# 1024 * deep_mul + 512, 40, 40 => 512, 40, 40
P4 = self.conv3_for_upsample1(P4)
# 512, 40, 40 => 512, 80, 80
P4_upsample = self.upsample(P4)
# 512, 80, 80 cat 256, 80, 80 => 768, 80, 80
P3 = torch.cat([P4_upsample, feat1], 1)
# 768, 80, 80 => 256, 80, 80
P3 = self.conv3_for_upsample2(P3)
# 256, 80, 80 => 256, 40, 40
P3_downsample = self.down_sample1(P3)
# 512, 40, 40 cat 256, 40, 40 => 768, 40, 40
P4 = torch.cat([P3_downsample, P4], 1)
# 768, 40, 40 => 512, 40, 40
P4 = self.conv3_for_downsample1(P4)
# 512, 40, 40 => 512, 20, 20
P4_downsample = self.down_sample2(P4)
# 512, 20, 20 cat 1024 * deep_mul, 20, 20 => 1024 * deep_mul + 512, 20, 20
P5 = torch.cat([P4_downsample, feat3], 1)
# 1024 * deep_mul + 512, 20, 20 => 1024 * deep_mul, 20, 20
P5 = self.conv3_for_downsample2(P5)
#------------------------加强特征提取网络------------------------#
# P3 256, 80, 80
# P4 512, 40, 40
# P5 1024 * deep_mul, 20, 20
shape = P3.shape # BCHW
P3 = self.MobileViTv2Attention2(P3)
P4 = self.MobileViTv2Attention3(P4)
P5 = self.MobileViTv2Attention4(P5)
# P3 256, 80, 80 => num_classes + self.reg_max * 4, 80, 80
# P4 512, 40, 40 => num_classes + self.reg_max * 4, 40, 40
# P5 1024 * deep_mul, 20, 20 => num_classes + self.reg_max * 4, 20, 20
x = [P3, P4, P5]
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
# num_classes + self.reg_max * 4 , 8400 => cls num_classes, 8400;
# box self.reg_max * 4, 8400
box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.num_classes), 1)
# origin_cls = [xi.split((self.reg_max * 4, self.num_classes), 1)[1] for xi in x]
dbox = self.dfl(box)
return dbox, cls, x, self.anchors.to(dbox.device), self.strides.to(dbox.device)
import numpy as np
import torch
from torch import nn
from torch.nn import init
class MobileViTv2Attention(nn.Module):
'''
Scaled dot-product attention
'''
def __init__(self, d_model):
'''
:param d_model: Output dimensionality of the model
:param d_k: Dimensionality of queries and keys
:param d_v: Dimensionality of values
:param h: Number of heads
'''
super(MobileViTv2Attention, self).__init__()
self.fc_i = nn.Linear(d_model,1)
self.fc_k = nn.Linear(d_model, d_model)
self.fc_v = nn.Linear(d_model, d_model)
self.fc_o = nn.Linear(d_model, d_model)
self.d_model = d_model
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, input):
'''
Computes
:param queries: Queries (b_s, nq, d_model)
:return:
'''
i = self.fc_i(input) #(bs,nq,1)
weight_i = torch.softmax(i, dim=1) #bs,nq,1
context_score = weight_i * self.fc_k(input) #bs,nq,d_model
context_vector = torch.sum(context_score,dim=1,keepdim=True) #bs,1,d_model
v = self.fc_v(input) * context_vector #bs,nq,d_model
out = self.fc_o(v) #bs,nq,d_model
return out
模型改完了,直接训练试试水
芜湖!没有任何问题!!!
当然了,如果是公共数据集的话,大概率你训练出来跟官方没任何区别,甚至效果更差,不过,如果是其他数据集说不定呢,具体的就自己玩儿了,如果是本科的话,发个水刊感觉这点工作量已经够了