所有存放图片的文件夹一定要自己新建好!!!不然会报错找不到路径
随机从ccpd取出一定数量的图片
新建一个select_ccpd_data.py文件。
# ccpd_base: 5000
# ccpd_blur: 50
# ccpd_challenge: 50
# ccpd_db: 50
# ccpd_fn: 50
# ccpd_rotate: 50
# ccpd_tilt: 50
# ccpd_weather: 50
from shutil import copyfile
import os
import random
def select_data(src_path, dst_path_1, dst_path_2, num):
dirs = os.listdir(src_path)
random.seed(0)#随机选取图片
data_index = random.sample(range(len(dirs)), num)
data_index.sort(reverse=True)
counter = 0
for image in dirs:
if counter == data_index[-1]:
ret = data_index.pop()
if len(data_index) <= num/10:
copyfile(src_path + f"{image}", dst_path_2 + f"{image}")
else:
copyfile(src_path + f"{image}", dst_path_1 + f"{image}")
if not data_index:
break
counter += 1
if __name__ == "__main__":
# CCPD数据集文件夹位置
root = "D:/homework/yolov5/yolov5-car-practice/DATA/CCPD2019/"
base_path = root + "ccpd_base/"
blur_path = root + "ccpd_blur/"
challenge_path = root + "ccpd_challenge/"
db_path = root + "ccpd_db/"
fn_path = root + "ccpd_fn/"
rotate_path = root + "ccpd_rotate/"
tilt_path = root + "ccpd_tilt/"
weather_path = root + "ccpd_weather/"
dic = {base_path: 5000, blur_path: 50, challenge_path: 50, db_path: 50, fn_path: 50, rotate_path: 50,
tilt_path: 50, weather_path: 50}#想从ccpd的各个文件里取多少张图片,直接修改这里就行
# 训练集路径
dst_train_path = "D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd/"#记得改成自己的路径
# 评估集路径
dst_val_path = "D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd/" # 为什么一定要绝对路径???
for path in dic:
select_data(path, dst_path_1=dst_train_path, dst_path_2=dst_val_path, num=dic[path])
图片存放在small_ccpd。
划分训练集、验证集和测试集,按9:1:0划分的
新建一个divide_data.py文件。
import os
import random
import shutil
from shutil import copy2
trainfiles = os.listdir(r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd") #(源图片文件夹)
num_train = len(trainfiles)
print("num_train: " + str(num_train) )
index_list = list(range(num_train))
print(index_list)
random.shuffle(index_list) # 打乱顺序
num = 0
trainDir = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/images/train" #(将图片文件夹中的9份放在这个文件夹下)
validDir = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/images/val" #(将图片文件夹中的1份放在这个文件夹下)
detectDir = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/images/test" #(将图片文件夹中的0份放在这个文件夹下)
for i in index_list:
fileName = os.path.join(r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd", trainfiles[i]) #(源图片文件夹)+图片名=图片地址
if num < num_train*0.9: # 9:1:0
print(str(fileName))
copy2(fileName, trainDir)
elif num < num_train*1.0:
print(str(fileName))
copy2(fileName, validDir)
else:
print(str(fileName))
copy2(fileName, detectDir)
num += 1
如果train:val:test=7:2:1,那么第一个数字写0.7,第二个写(0.7+0.1=0.8)
如果train:val:test=9:1:0,那么第一个数字写0.9,第二个写(0.9+0.1=1.0)
图片存放在small_ccpd_yolo-images里。
将划分好的数据集转换成yolov5的格式
新建一个ccpd2yolov5.py文件。ccpd数据集的检测和识别标签都在图片名中,直接从图片名上读取出来,再写入txt文件中即可。
(最后一定要加反斜杠/,不然会把原来划分好的的图片都删了)
import shutil
import cv2
import os
def txt_translate(path, txt_path):
for filename in os.listdir(path):
print(filename)
list1 = filename.split("-", 3) # 第一次分割,以减号'-'做分割
subname = list1[2]
list2 = filename.split(".", 1)
subname1 = list2[1]
if subname1 == 'txt':
continue
lt, rb = subname.split("_", 1) # 第二次分割,以下划线'_'做分割
lx, ly = lt.split("&", 1)
rx, ry = rb.split("&", 1)
width = int(rx) - int(lx)
height = int(ry) - int(ly) # bounding box的宽和高
cx = float(lx) + width / 2
cy = float(ly) + height / 2 # bounding box中心点
img = cv2.imread(path + filename)
if img is None: # 自动删除失效图片(下载过程有的图片会存在无法读取的情况)
os.remove(os.path.join(path, filename))
continue
width = width / img.shape[1]
height = height / img.shape[0]
cx = cx / img.shape[1]
cy = cy / img.shape[0]
txtname = filename.split(".", 1)
txtfile = txt_path + txtname[0] + ".txt"
# 绿牌是第0类,蓝牌是第1类
with open(txtfile, "w") as f:
f.write(str(0) + " " + str(cx) + " " + str(cy) + " " + str(width) + " " + str(height))
if __name__ == '__main__':
# det图片存储地址
trainDir = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/images/train/"
validDir = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/images/val/"
testDir = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/images/test/"
# det txt存储地址,txt里面是类别,四个值
train_txt_path = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/labels/train/"
val_txt_path = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/labels/val/"
test_txt_path = r"D:/homework/yolov5/yolov5-car-practice/DATA/small_ccpd_yolo/labels/test/"
txt_translate(trainDir, train_txt_path)
txt_translate(validDir, val_txt_path)
txt_translate(testDir, test_txt_path)
标签存放在small_ccpd_yolo-labels里。
我的文件夹目录
总结
总结来说就是,先运行select_ccpd_data.py,从CCPD2019这个巨大的数据集里随机拿出一小部分图片存到small_ccpd;然后运行divide_data.py,把small_ccpd里的数据进行划分,分成训练集、验证集、测试集(我没划测试集,到时候直接给一张图片detect就好了),把图片存进small_ccpd_yolo;最后运行ccpd2yolov5.py,将small_ccpd_yolo里的各个集里面的ccpd文件名转化成txt格式。一个标准的yolov5格式的数据集small_ccpd_yolo就做好啦。