在使用yolov5训练数据集时,出现如下警报(本来原有数据集是2万多张,最后排除出现警告的数据集,只有1万9千多张了)
首先说明结果:在yolov5-master\utils\datasets.py中的问题。找到代码
def verify_image_label(args):
# Verify one image-label pair
im_file, lb_file, prefix = args
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, corrupt
try:
# verify images
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
if im.format.lower() in ('jpg', 'jpeg'):
with open(im_file, 'rb') as f:
f.seek(-2, 2)
assert f.read() == b'\xff\xd9', 'corrupted JPEG'
# verify labels
segments = [] # instance segments
if os.path.isfile(lb_file):
nf = 1 # label found
with open(lb_file, 'r') as f:
l = [x.split() for x in f.read().strip().splitlines() if len(x)]
if any([len(x) > 8 for x in l]): # is segment
classes = np.array([x[0] for x in l], dtype=np.float32)
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
l = np.array(l, dtype=np.float32)
if len(l):
assert l.shape[1] == 5, 'labels require 5 columns each'
assert (l >= 0).all(), 'negative labels'
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
else:
ne = 1 # label empty
l = np.zeros((0, 5), dtype=np.float32)
else:
nm = 1 # label missing
l = np.zeros((0, 5), dtype=np.float32)
return im_file, l, shape, segments, nm, nf, ne, nc, ''
except Exception as e:
nc = 1
msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
return [None, None, None, None, nm, nf, ne, nc, msg]
其中把下面代码注释或者删除,即可成功读取所有图片。
if im.format.lower() in ('jpg', 'jpeg'):
with open(im_file, 'rb') as f:
f.seek(-2, 2)
assert f.read() == b'\xff\xd9', 'corrupted JPEG'
具体原因:
open打开图片之后,f.seek(-2,2)是从文件的末尾开始寻找倒数第一个和倒数第二个字节(之所以是字节,是因为f.read()函数,输出的字节传,类型是bytes格式),输出的格式如下(十六进制格式)
开头: b'\xff\xd8\xff\xe1#\x12Exif\x00\x00MM\x00*\x00
结尾: \xfbf`\x1d\xf1\xcf@\xa3\x81W\xe4\x80\xaeK\x10\xaaG\'<U6\x85p\xa5\x15\x88\xcf r+\xa3\x0f\x86\xa8\xea]\x91V\xac#I\xb6\xcf\xff\xd9'
正确的图片格式末尾都是\xff\xd9结尾!!!
对于错误的图片,需要把结尾改成\xff\xd9。所用工具为Hex Editor Neo,可以直接修改16进制的格式。