一、制作步骤:
1.收集数据(图片和视频)
#爬取百度上的图片
import os
import requests
import tqdm
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'}
def get_img(url, idx, path):
img = requests.get(url, headers=header)
with open(os.path.join(path, f"{idx}.jpg"), 'wb') as file:
file.write(img.content)
search = input("请输入搜索内容:")
number = int(input("请输入需求数量:"))
save_path = input("请输入图片要保存的位置:")
if not os.path.exists(save_path):
os.makedirs(save_path)
bar = tqdm.tqdm(total=number)
page = 0
while number > 0:
url = 'https://image.baidu.com/search/acjson'
params = {
"tn": "resultjson_com",
"logid": "11555092689241190059",
"ipn": "rj",
"ct": "201326592",
"is": "",
"fp": "result",
"queryWord": search,
"cl": "2",
"lm": "-1",
"ie": "utf-8",
"oe": "utf-8",
"adpicid": "",
"st": "-1",
"z": "",
"ic": "0",
"hd": "",
"latest": "",
"copyright": "",
"word": search,
"s": "",
"se": "",
"tab": "",
"width": "",
"height": "",
"face": "0",
"istype": "2",
"qc": "",
"nc": "1",
"fr": "",
"expermode": "",
"force": "",
"pn": str(60 * page),
"rn": min(number, 60), # 每次最多请求60张图片
"gsm": "1e",
"1617626956685": ""
}
result = requests.get(url, headers=header, params=params).json()
url_list = [data['thumbURL'] for data in result['data'][:-1]]
for i, img_url in enumerate(url_list):
get_img(img_url, 60 * page + i, save_path)
bar.update(1)
number -= 1
if number == 0:
break
page += 1
print("\n完成!")
2.处理视频进行解码:将视频的每一帧按照自己要求进行解码保存。
import cv2
import numpy as np
import os
import glob
import time
from ultralytics import YOLO
# Load the model
model_file = "./weights/yolov8x.pt" # Path to the model file
model = YOLO(model_file)
objs_labels = model.names
# Function to extract frames from a video
def video2frame(video_file, save_directory, skip_interval=10):
cap = cv2.VideoCapture(video_file)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
index = 0
while index < frame_count:
start_time = time.time()
ret, frame = cap.read()
if ret:
index += 1
if index % skip_interval != 0:
continue
# Detect objects
result = list(model(frame, stream=True, conf=0.4))[0]
boxes = result.boxes.data # Get the original torch.Tensor object
for box in boxes:
l, t, r, b = box[:4].cpu().numpy().astype(np.int32) # Convert to numpy array and then cast types
conf, id = box[4:].cpu().numpy()
if id == 0:
save_img_file = os.path.join(save_directory, f"{video_file.split(os.sep)[-1].split('.')[0]}_{index}.jpg")
cv2.imwrite(save_img_file, frame)
print(save_img_file)
break
end_time = time.time()
fps = 1 / (end_time - start_time)
else:
break
cap.release()
# Create directory if not exists
save_directory = "D:/xiangmu/paqutupian/picture"
if not os.path.exists(save_directory):
os.makedirs(save_directory)
# Iterate through all videos
videos_list = glob.glob("D:/xiangmu/paqutupian/video/*.mp4")
for video_file in videos_list:
video2frame(video_file, save_directory, skip_interval=10) # Define how many frames to skip for decoding
print("done")
只需要修改四个地方:
model_file = "./weights/yolov8x.pt" #需要用到的预训练模型
save_directory = "D:/xiangmu/paqutupian/picture" #解码后图片保存的文件夹
videos_list = glob.glob("D:/xiangmu/paqutupian/video/*.mp4") #视频所在的文件夹
video2frame(video_file, save_directory, skip_interval=10) #定义每隔多少帧进行解码
3.对于解码后的视频或者搜集的图片使用yolo进行自动标注。
import cv2
import os
import glob
from ultralytics import YOLO
# Load the model
model_file = "./weights/yolov8x.pt" # Model file
model = YOLO(model_file)
objs_labels = model.names
# List of images
images_list = glob.glob("D:/xiangmu/paqutupian/picture/*.jpg")
# Create directory if not exists
save_directory = "D:/xiangmu/paqutupian/label"
if not os.path.exists(save_directory):
os.makedirs(save_directory)
# Function to convert image to YOLO format
# 将图片转换为YOLO格式的函数
def image_2_yolo():
for img_path in images_list:
img_id = os.path.splitext(os.path.basename(img_path))[0]
img_data = cv2.imread(img_path)
result = list(model(img_data, stream=True, conf=0.5))[0]
boxes = result.boxes.data.cpu().numpy() # 将Boxes对象转换为numpy数组
yolo_boxes = []
img_h, img_w, _ = img_data.shape
for box in boxes:
l, t, r, b = box[:4].astype(int)
conf, id = box[4:]
if id == 0: # 人类别
class_label = int(id)
x_center = round((l + r) / 2 / img_w, 6)
y_center = round((t + b) / 2 / img_h, 6)
width = round((r - l) / img_w, 6)
height = round((b - t) / img_h, 6)
yolo_boxes.append([class_label, x_center, y_center, width, height])
yolo_label_file = os.path.join(save_directory, f"{img_id}.txt")
with open(yolo_label_file, 'w') as f:
for yolo_box in yolo_boxes:
f.write(' '.join(map(str, yolo_box)) + '\n')
# Function to draw bounding boxes on images
def draw_boxes(img, yolo_label_file):
img_copy = img.copy()
color = (255, 0, 0) # Red color for bounding boxes
with open(yolo_label_file, 'r') as f:
lines = f.readlines()
for line in lines:
class_label, x_center, y_center, width, height = map(float, line.strip().split())
x1 = int((x_center - width / 2) * img_copy.shape[1])
y1 = int((y_center - height / 2) * img_copy.shape[0])
x2 = int((x_center + width / 2) * img_copy.shape[1])
y2 = int((y_center + height / 2) * img_copy.shape[0])
cv2.rectangle(img_copy, (x1, y1), (x2, y2), color, 2)
cv2.putText(img_copy, str(int(class_label)), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
return img_copy
# Convert images to YOLO format
image_2_yolo()
# Test drawing bounding boxes
for img_path in images_list[:5]: # Draw bounding boxes for first 5 images
img = cv2.imread(img_path)
img_id = os.path.splitext(os.path.basename(img_path))[0]
yolo_label_file = os.path.join(save_directory, f"{img_id}.txt")
img_with_boxes = draw_boxes(img, yolo_label_file)
cv2.imshow("Image with Bounding Boxes", img_with_boxes)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Generate classes.txt
with open('./classes.txt', 'w') as f:
f.write('0 person\n')
print("Done")
只需要修改四个地方:
model_file = "./weights/yolov8x.pt" # 预训练模型
images_list = glob.glob("D:/xiangmu/paqutupian/picture/*.jpg") #需要进行自动标注的图片
save_directory = "D:/xiangmu/paqutupian/label" #标注信息保存的文件夹路径
with open('./classes.txt', 'w') as f: #标注的类别
f.write('0 person\n')
4.使用yolo进行自动标注后总会出现漏标的情况,接下来对所有的标注图片检查,对漏标的目标进行手工标注。
pip install labelimg #安装标注工具labelimg
labelimg #运行
把“classe.txt"保存到”label“文件夹中:
如果有多个类别,”classes.txt“中写入:
0 person
1 cat
......
点击‘open dir'打开图片文件夹:
点击’next image‘,对每张图片检查,有漏检的自己手动标注,按快捷键‘W’:
5.进行数据集的划分。
import shutil
import tqdm
import glob
import os
# 输入文件路径
image_dir = "D:/xiangmu/paqutupian/picture/"
label_dir = "D:/xiangmu/paqutupian/label/"
output_dir = "D:/xiangmu/paqutupian/person_data/"
# 获取图像文件列表和标签文件列表
images_list = glob.glob(os.path.join(image_dir, "*.jpg"))
label_list = glob.glob(os.path.join(label_dir, "*.txt"))
# 创建输出文件夹结构
os.makedirs(os.path.join(output_dir, "images/train"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "images/val"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "images/test"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "labels/train"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "labels/val"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "labels/test"), exist_ok=True)
# 划分数据集
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
total_images = len(images_list)
train_num = int(total_images * train_ratio)
val_num = int(total_images * val_ratio)
test_num = total_images - train_num - val_num
# 按照划分比例切分数据集
train_images_list = images_list[:train_num]
val_images_list = images_list[train_num:train_num + val_num]
test_images_list = images_list[train_num + val_num:]
train_label_list = label_list[:train_num]
val_label_list = label_list[train_num:train_num + val_num]
test_label_list = label_list[train_num + val_num:]
# 打印各个数据集的数量
print(f"Train images: {len(train_images_list)}, labels: {len(train_label_list)}")
print(f"Val images: {len(val_images_list)}, labels: {len(val_label_list)}")
print(f"Test images: {len(test_images_list)}, labels: {len(test_label_list)}")
# 复制文件到对应文件夹
def copy_files(file_list, output_path):
for file_path in tqdm.tqdm(file_list, desc=output_path.split('/')[-1]):
file_name = os.path.basename(file_path)
shutil.copy(file_path, os.path.join(output_path, file_name))
copy_files(train_images_list, os.path.join(output_dir, "images/train"))
copy_files(val_images_list, os.path.join(output_dir, "images/val"))
copy_files(test_images_list, os.path.join(output_dir, "images/test"))
copy_files(train_label_list, os.path.join(output_dir, "labels/train"))
copy_files(val_label_list, os.path.join(output_dir, "labels/val"))
copy_files(test_label_list, os.path.join(output_dir, "labels/test"))
print("Dataset split completed.")
只需要修改三处:
image_dir = "D:/xiangmu/paqutupian/picture/" #图片文件夹路径
label_dir = "D:/xiangmu/paqutupian/label/" #标注信息文件夹路径
output_dir = "D:/xiangmu/paqutupian/person_data/"#划分好的数据集的路径
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1 #选择train、val、test的比例
二、遇到的问题:
1.用yolo进行训练时,出现错误:corrupt JPEG restored and saved
出现这种错误,是由于下载图片时,强制转换造成的,运行一下代码,将图片格式重现进行转化:
import os
import cv2
dataDir = "xxxx/picture/" #原来保存图片的文件夹
saveDir = "xxxxx/image/" #重新要保存的文件夹
if not os.path.exists(saveDir):
os.makedirs(saveDir)
for one_pic in os.listdir(dataDir):
one_path = os.path.join(dataDir, one_pic)
if os.path.isfile(one_path):
one_img = cv2.imread(one_path)
if one_img is not None:
new_path = os.path.join(saveDir, one_pic)
cv2.imwrite(new_path, one_img)
print(f"Saved {one_pic} to {new_path}")
else:
print(f"Failed to read image: {one_path}")
else:
print(f"{one_path} is not a file")
运行结果:
可以看出原来的和转化后的文件夹的大小不同: