gt可视化
import cv2
img = cv2.imread('./4B7A9810.MP4_00113_engine_vgg16.jpg')
with open('./4B7A9810.MP4_00113.txt', 'r') as f1:
for line in f1.readlines():
line = line.strip('\n')
numbers = line.split()
Class = int(numbers[0])
Cx = float(numbers[1])
Cy = float(numbers[2])
Wide = float(numbers[3])
High = float(numbers[4])
p1 = (int((Cx-Wide/2)*19584), int((Cy-High/2)*12600))
p2 = (int((Cx+Wide/2)*19584), int((Cy+High/2)*12600))
print(p1, p2)
img_ = cv2.rectangle(img, p1, p2, (255, 0, 0), 5)
cv2.imwrite('vgg16.jpg', img_)
神经网络打印
import torch
import torchvision
from torchsummary import summary
from darknet19 import *
darknet = Darknet19().cuda()
features = list(darknet.features)[:43] # 59
darknet = torch.nn.Sequential(*features).cuda()
summary(darknet,input_size=(3,256,256), batch_size=-1, device='cuda')
vgg = torchvision.models.vgg16(pretrained=False).cuda()
vgg_features = list(vgg.features)[:30]
vgg = torch.nn.Sequential(*vgg_features).cuda()
summary(vgg,input_size=(3,256,256), batch_size=-1, device='cuda')
res = torchvision.models.resnet50().cuda()
feature_maps=nn.Sequential(*list(res.children())[:8])
#summary(feature_maps, input_size=(3,256,256), batch_size=-1, device='cuda')
sf = torchvision.models.shufflenet_v2_x1_0().cuda()
#summary(sf, input_size=(3,256,256), batch_size=-1, device='cuda')
把多类的gt标签 按照类别分成单类的gt标签
import os
import shutil
input_folder = '/home/etc/Data/all_find/labels/val/'
person_folder = '/home/etc/Data/all_find/labels/val_person/'
drone_folder = '/home/etc/Data/all_find/labels/val_drone/'
car_folder = '/home/etc/Data/all_find/labels/val_car/'
plane_folder = '/home/etc/Data/all_find/labels/val_plane/'
i = 1
for filename in os.listdir(input_folder):
input_path = os.path.join(input_folder, filename)
person_path = os.path.join(person_folder, filename)
drone_path = os.path.join(drone_folder, filename)
car_path = os.path.join(car_folder, filename)
plane_path = os.path.join(plane_folder, filename)
person_table = []
drone_table = []
car_table = []
plane_table = []
print(i)
i += 1
with open(input_path, 'r') as f1: #input_path
for line in f1.readlines():
print(input_path)
line = line.strip('/n')
numbers = line.split()
_class = int(numbers[0])
if _class == 0:
plane_table.append(line)
with open(plane_path, 'w') as fp:
for l in plane_table:
fp.write(l)
elif _class == 1:
drone_table.append(line)
with open(drone_path, 'w') as fd:
for l in drone_table:
fd.write(l)
elif _class == 2:
person_table.append(line)
with open(person_path, 'w') as fpr:
for l in person_table:
fpr.write(l)
elif _class in [3, 4, 5, 6]:
car_table.append(line)
with open(car_path, 'w') as fc:
for l in car_table:
fc.write(l)
随机划分数据集
#coding=utf-8
# 将数据集随机划分为训练集和验证集,测试集
import os
import random
import shutil
from tqdm import tqdm
image_path = '/home/etc/Data/4K/images/val/' # 源图片文件夹路径
label_path = '/home/etc/Data/4K/labels/val/' # 标签文件夹路径
out_images = '/home/etc/Data/4K/images/test/'
out_labels = '/home/etc/Data/4K/labels/test/'
rate = 0.5
pathDir = os.listdir(image_path)
picknumber = int(len(pathDir) * rate)
sample = random.sample(pathDir, picknumber)
for name in tqdm(sample):
shutil.move(image_path + name, out_images + name)
shutil.move(label_path + name.replace(".jpg", ".txt"), out_labels + name.replace(".jpg", ".txt"))
图片按照数字名称顺序融合
import os
import cv2
import numpy as np
for i in range(200):
file_path_1 = f"/home/etc/ultralytics-yolov8/runs/detect/predict33/F_{str(i+1+155).zfill(5)}.png"
file_path_2 = f"/home/etc/ultralytics-yolov8/runs/detect/predict32/F_{str(i+1).zfill(5)}.png"
img1 = cv2.imread(file_path_1)
img2 = cv2.imread(file_path_2)
image = np.concatenate((img1, img2), axis=0)
print(i)
cv2.imwrite(f'./merges/{i}.png', image)
按数字名称顺序 图片转视频
import cv2
import glob
import os
import re
img_array = []
def extract_number(s):
return int(re.findall(r'\d+', s)[0])
file_list = sorted(os.listdir('/home/etc/Data/merges/'), key=extract_number)
#file_list = os.listdir('/home/etc/Data/merges/')
out = cv2.VideoWriter('VideoResults.avi', cv2.VideoWriter_fourcc(*'DIVX'), 20, (1920, 2160))
i = 0
for filename in file_list:
img = cv2.imread('/home/etc/Data/merges/' + filename)
print(filename)
imgr = cv2.resize(img, (1920, 2160))
cv2.putText(imgr, 'EOS-R5', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 4)
cv2.putText(imgr, 'CR-N300', (50, 1180), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 4)
#img_array.append(img)
#for i in range(len(img_array)): #len(img_array)
out.write(imgr)
print(i)
i += 1
out.release()
批量修改gt标签的坐标
#coding=utf-8
#19568*12588到19584*12600的gt转换
import os
input_folder = '/home/etc/Data/4K_all_find/labels/test_car/'
output_folder = '/home/etc/Data/4K_all_find/labels/test_car_/'
i = 1
for filename in os.listdir(input_folder):
input_path = os.path.join(input_folder, filename)
output_path = os.path.join(output_folder, filename)
with open(input_path, 'r') as f1, open(output_path, 'w') as f2:
print(i)
i += 1
for line in f1.readlines():
line = line.strip('\n') #去掉列表中每一个元素的换行符
numbers = line.split()
out_numbers = [int(int(numbers[0])-int(3))] + [float(numbers[1])] + [float(numbers[2])] + [float(numbers[3])] + [float(numbers[4])]
# x12cx(x1, w) y12cy(y1, h)
# [float(num) * (19568 / 19584) for num in numbers[1:]]
output = ' '.join(map(str, out_numbers))
#print('ori:', line)
#print('after:', pp)
f2.write(output)
f2.write('\n')
效仿VGG写法的 darknet19
# -*- coding: utf-8 -*-
# @Author : LG
# 使用了与torchvision中VGG相同的搭建方式.
from torch import nn
import os
import torch
# 参数配置,标准的darknet19参数.
cfg = [32, 'M', 64, 'M', 128, 64, 128, 'M', 256, 128, 256, 'M',
512, 256, 512, 256, 512, 'M', 1024, 512, 1024, 512, 1024]
def make_layers(cfg, in_channels=3, batch_norm=True):
layers = []
flag = True # 用于变换卷积核大小,(True选后面的,False选前面的)
in_channels= in_channels
for v in cfg:
if v == 'M':
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
else:
layers.append(nn.Conv2d(in_channels = in_channels,
out_channels= v,
kernel_size=(1, 3)[flag],
stride=1,
padding=(0,1)[flag],
bias=False))
if batch_norm:
layers.append(nn.BatchNorm2d(v))
in_channels = v
layers.append(nn.LeakyReLU(negative_slope=0.1, inplace=True))
flag = not flag
return nn.Sequential(*layers)
class Darknet19(nn.Module):
def __init__(self, num_classes=1000, in_channels=3, batch_norm=True, pretrained=False):
super(Darknet19, self).__init__()
# 调用nake_layers 方法搭建网络
# (build the network)
self.features = make_layers(cfg, in_channels=in_channels, batch_norm=batch_norm)
# 网络最后的分类层,使用 [1x1卷积和全局平均池化] 代替全连接层.
# (use 1x1 Conv and averagepool replace the full connection layer.)
self.classifier = nn.Sequential(
nn.Conv2d(1024, num_classes, kernel_size=1, stride=1),
nn.AdaptiveAvgPool2d(output_size=(1)),
nn.Softmax(dim=0)
)
# 导入预训练模型或初始化
if pretrained:
self.load_weight()
else:
self._initialize_weights()
def forward(self, x):
# 前向传播
x = self.features(x)
x = self.classifier(x)
# 为了使输出与使用全连接层格式相同,这里对输出进行了resize
# resize [B, num_classes, 1, 1] to [B, num_classes]
x = x.view(x.size(0),-1)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def load_weight(self):
weight_file = 'weights/darknet19-deepBakSu-e1b3ec1e.pth'
if not os.path.exists(weight_file):
import wget
url = 'https://s3.ap-northeast-2.amazonaws.com/deepbaksuvision/darknet19-deepBakSu-e1b3ec1e.pth'
wget.download(url=url, out='weights/darknet19-deepBakSu-e1b3ec1e.pth')
# 转换权重文件中的keys.(change the weights dict `keys)
assert len(torch.load(weight_file).keys()) == len(self.state_dict().keys())
dic = {}
for now_keys, values in zip(self.state_dict().keys(), torch.load(weight_file).values()):
dic[now_keys]=values
self.load_state_dict(dic)
if __name__ == '__main__':
# 原权重文件为1000分类, 在imagenet上进行预训练,
# Pretrained model train on imagenet dataset. 1000 nums of classifical.
# top-1 accuracy 76.5% , top-5 accuracy 93.3%.
net = Darknet19(num_classes=1000, pretrained=True)
print(net)
x = torch.zeros((2,3,224,224))
out = net(x)
print(out.size())
裁剪图片并修改gt
import cv2
import os
import numpy as np
input_folder = '/home/etc/Data/4K/images/train/'
ouput_folder = '/home/etc/Data/4K_crop/images/train/'
input_label_folder = '/home/etc/Data/4K/labels/train/'
output_label_folder = '/home/etc/Data/4K_crop/labels/train/'
mid_name = '_crop'
patchsize = 2690
h, w = 2160, 3840
w_ratio = 3840 / 2690
h_ratio = 2160 / 2690
left = right = 0
top = np.random.randint(0, 530)
bottom = 530 - top
color = (114, 114, 114)
i = 1
for file in os.listdir(input_folder):
input_file_path = os.path.join(input_folder, file)
a, b = os.path.splitext(file)
new_name = a + mid_name + b
ouput_file_path = os.path.join(ouput_folder, new_name)
img = cv2.imread(input_file_path)
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
crop_w = np.random.randint(0, w - patchsize)
patch = img[:, crop_w : crop_w + patchsize]
cv2.imwrite(ouput_file_path, patch)
print(file)
print(i)
i += 1
label_file = file.replace('.jpg', '.txt')
input_labelfile_path = os.path.join(input_label_folder, label_file)
new_label_name = new_name.replace('.jpg', '.txt')
output_labelfile_path = os.path.join(output_label_folder, new_label_name)
with open(input_labelfile_path, 'r') as f1, open (output_labelfile_path, 'w') as f2:
for line in f1.readlines():
line = line.strip('\n')
numbers = line.split()
if numbers[0] == 'Vehicles':
numbers[0] = 4
Class = int(numbers[0])
Cx = float(numbers[1])
Cy = float(numbers[2])
Wide = float(numbers[3])
High = float(numbers[4])
X1 = Cx - (Wide / 2)
X2 = Cx + (Wide / 2)
if (X1 * w) > crop_w and (X2 * w) < crop_w + patchsize:
Cx_crop = (Cx * w - crop_w) / patchsize
Cy_crop = (Cy * h + top) / patchsize
Wide_crop = Wide * w_ratio
High_crop = High * h_ratio
crop_numbers = [Class] + [Cx_crop] + [Cy_crop] + [Wide_crop] + [High_crop]
'''
p1 = (int((Cx_crop-Wide_crop/2)*patchsize), int((Cy_crop-High_crop/2)*patchsize))
p2 = (int((Cx_crop+Wide_crop/2)*patchsize), int((Cy_crop+High_crop/2)*patchsize))
img = cv2.rectangle(patch, p1, p2, (0, 0, 255), 1)
cv2.imwrite(ouput_file_path, img)
'''
result = ' '.join(map(str, crop_numbers))
f2.write(result)
f2.write('\n')
批量需改图像大小 cv2.copyMakeBorder
import cv2
import os
input_folder = '/home/kasuga/Data/To_Datatang_After_Resize_New/250MP/Ground/'
output_folder = '/home/kasuga/Data/To_Datatang_After_Resize_image/250MP/Ground/'
top, bottom, left, right = 0, 12, 0, 16
color = (114, 114, 114)
out_table = os.listdir(output_folder)
i = 0
for filename in os.listdir(input_folder):
if filename not in out_table:
input_path = os.path.join(input_folder, filename)
img = cv2.imread(input_path)
output_path = os.path.join(output_folder, filename)
trans_im = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
cv2.imwrite(output_path, trans_im)
print(i)
i += 1
数据增强
import albumentations as A
import numpy
import os
import cv2
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
inputimage_folder = '/home/kasuga/Data/To_Datatang_After_Resize_250MP/image/new_/'
outputimage_folder = '/home/kasuga/Data/To_Datatang_After_Resize_250MP/image/new/' #''
mid_name = '_Hor' # '_Hor' '_Hor_Con_CLAHE' '_Hor_Con__Blur' '_Hor_Con_Color' '_Con_CLAHE' '_Con_Blur' '_Con_Color'
out_table = os.listdir(outputimage_folder)
transform = A.Compose([
A.HorizontalFlip(p=1),
#A.RandomBrightnessContrast(p=1),
#A.CLAHE(p=1),
#A.Blur(p=1),
#A.ColorJitter(brightness=0.2, contrast=0.2, hue=0.2, p=1),
])
count = 0
for name in os.listdir(inputimage_folder):
a, b = os.path.splitext(name)
new_name = a + mid_name + b
if new_name not in out_table:
inputimage_path = os.path.join(inputimage_folder, name)
image = cv2.imread(inputimage_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#image = Image.open(inputimage_path)
#new_image = im_aug(image)
new_image = transform(image = image)
new_image = new_image['image']
new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
#cv2.imwrite(os.path.join(outputimage_folder, new_name), new_image)
#new_image = cv2.cvtColor(numpy.asarray(new_image), cv2.COLOR_RGB2BGR)
cv2.imwrite(os.path.join(outputimage_folder, new_name), new_image)
print(count)
count += 1