先看结果吧
然后进入正题,展示一下数据集格式
再展示一下修改后的数据集格式(每一张图片扩充了五张)
这是数据增强的代码,记得按自己文件的路径和命名进行少量修改
import cv2
import json
import numpy as np
import base64
import io
from PIL import Image
import random
# 读取JSON标注文件
def data_build():
#共有45张图片
for i in range(1, 46):
#每张图片随机裁剪五下
for j in range(1, 6):
json_file_path = f'dataset/{str(i).zfill(2)}.json'
# 读取JSON文件
with open(json_file_path, 'r') as f:
data = json.load(f)
image_data = data['imageData']
image_data = base64.b64decode(image_data)
# 创建新的JSON数据对象
new_data = {
"version": data["version"],
"flags": data["flags"],
"shapes": [],
"imagePath": data["imagePath"],
"imageData": [],
"imageHeight": 416,
"imageWidth": 416
}
# 将图像数据转换为NumPy数组
image_array = np.frombuffer(image_data, dtype=np.uint8)
# 解码图像数组为OpenCV图像
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
# 获取原始图像尺寸
height, width, _ = image.shape
# 设置目标裁剪尺寸
target_size = 416
# 随机选择裁剪位置
x = random.randint(0, width - target_size)
y = random.randint(0, height - target_size)
print('x的范围为',x,'--',x+416)
print('y的范围为',y,'--',y+416)
# 裁剪图像
cropped_image = image[y:y+target_size, x:x+target_size]
# 调整裁剪后的图像大小为416x416
resized_image = cv2.resize(cropped_image, (target_size, target_size))
# 将图像转换为Base64编码
_, buffer = cv2.imencode('.jpg', resized_image)
base64_cropped_image = base64.b64encode(buffer).decode()
new_data['imageData'] = base64_cropped_image
# 遍历每个形状
for shape in data['shapes']:
points = shape['points']
print("points:",points)
# 检查每个点的坐标
x1, y1 = points[0]
x2, y2 = points[1]
x_ = (x1+x2)/2
y_ = (y1+y2)/2
if x_>x and x_<x+416 and y_>y and y_<y+416:
print("找到了一个点,x为:",x1,',y为',y1)
x1 = x1-x
x2 = x2-x
y1 = y1-y
y2 = y2-y
points = [[x1,y1],[x2,y2]]
shape['points'] = points
new_data['shapes'].append(shape)
# 将新的JSON数据保存到新的文件中
with open(f'{i}_{j}.json', 'w') as f:
json.dump(new_data, f, indent=4)
data_build()
裁剪完毕,读取看看
import json
import cv2
import base64
import numpy as np
# 读取JSON文件
with open('dataset_build/26_3.json', 'r') as f:
data = json.load(f)
# 读取图像路径和图像数据
image_path = data["imagePath"]
image_data = data["imageData"]
# 解码图像数据
image_data = base64.b64decode(image_data)
# 将图像数据转换为NumPy数组
image_array = np.frombuffer(image_data, dtype=np.uint8)
# 解码图像数组为OpenCV图像
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
# 遍历每个形状
for shape in data['shapes']:
label = shape['label']
points = shape['points']
# 绘制方框
x1, y1 = points[0]
x2, y2 = points[1]
cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
# 显示图像
cv2.imshow('Image with Annotations', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
结果: