当执行如下语句是报错:
model.fit(train_dataset, # 训练数据集
eval_dataset, # 评估数据集
epochs=config_parameters['epochs'], # 总的训练轮次
batch_size=config_parameters['batch_size'], # 批次计算的样本量大小
shuffle=True, # 是否打乱样本集
verbose=1, # 日志展示格式
save_dir='./chk_points/', # 分阶段的训练模型存储路径
callbacks=Callbacks) # 多个回调函数使用
报错信息如下:
WARNING:root:DataLoader reader thread raised an exception.
Exception in thread Thread-9:
Traceback (most recent call last):
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dataloader/dataloader_iter.py", line 347, in _thread_loop
six.reraise(*sys.exc_info())
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/six.py", line 703, in reraise
raise value
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dataloader/dataloader_iter.py", line 317, in _thread_loop
batch = self._dataset_fetcher.fetch(indices)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dataloader/fetcher.py", line 60, in fetch
data = [self.dataset[idx] for idx in batch_indices]
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dataloader/fetcher.py", line 60, in <listcomp>
data = [self.dataset[idx] for idx in batch_indices]
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/datasets/cifar.py", line 165, in __getitem__
image = self.transform(image)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 122, in __call__
raise e
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 117, in __call__
data = f(data)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 273, in __call__
outputs.append(apply_func(inputs[i]))
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 657, in _apply_image
self.to_rgb)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/functional.py", line 655, in normalize
return F_cv2.normalize(img, mean, std, data_format, to_rgb)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/functional_cv2.py", line 562, in normalize
img = (img - mean) / std
ValueError: operands could not be broadcast together with shapes (32,32,3) (3,1,1)
The loss value printed in the log is the current step, and the metric is the average value of previous step.
Epoch 1/20
fail to perform transform [<paddle.vision.transforms.transforms.Normalize object at 0x7ff084464dd0>] with error: operands could not be broadcast together with shapes (32,32,3) (3,1,1) and stack:
Traceback (most recent call last):
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 117, in __call__
data = f(data)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 273, in __call__
outputs.append(apply_func(inputs[i]))
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/transforms.py", line 657, in _apply_image
self.to_rgb)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/functional.py", line 655, in normalize
return F_cv2.normalize(img, mean, std, data_format, to_rgb)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/functional_cv2.py", line 562, in normalize
img = (img - mean) / std
ValueError: operands could not be broadcast together with shapes (32,32,3) (3,1,1)
---------------------------------------------------------------------------SystemError Traceback (most recent call last)<ipython-input-34-3fc3b4a66586> in <module>
10 verbose=1, # 日志展示格式
11 save_dir='./chk_points/', # 分阶段的训练模型存储路径
---> 12 callbacks=Callbacks) # 多个回调函数使用
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/hapi/model.py in fit(self, train_data, eval_data, batch_size, epochs, eval_freq, log_freq, save_dir, save_freq, verbose, drop_last, shuffle, num_workers, callbacks)
1493 for epoch in range(epochs):
1494 cbks.on_epoch_begin(epoch)
-> 1495 logs = self._run_one_epoch(train_loader, cbks, 'train')
1496 cbks.on_epoch_end(epoch, logs)
1497
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/hapi/model.py in _run_one_epoch(self, data_loader, callbacks, mode, logs)
1777 def _run_one_epoch(self, data_loader, callbacks, mode, logs={}):
1778 outputs = []
-> 1779 for step, data in enumerate(data_loader):
1780 # data might come from different types of data_loader and have
1781 # different format, as following:
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dataloader/dataloader_iter.py in __next__(self)
363 try:
364 if in_dygraph_mode():
--> 365 return self._reader.read_next_var_list()
366 else:
367 if self._return_list:
SystemError: (Fatal) Blocking queue is killed because the data reader raises an exception.
[Hint: Expected killed_ != true, but received killed_:1 == true:1.] (at /paddle/paddle/fluid/operators/reader/blocking_queue.h:158)
关键的出错信息如下:
ValueError: operands could not be broadcast together with shapes (32,32,3) (3,1,1)
ValueError: operands could not be broadcast together with shapes (32,32,3) (3,1,1)
通过排除法,当把 数据增强的方法改变为 最开始 最简单的那种之后,就又能开始训练了,说明以上出错信息,出在 原来的 数据增强的模块,而且,结合(32,32,3) (3,1,1) 来看,最大可能性,在
通过把 T.Transpose(), T.Resize((256, 256)), T.CenterCrop(224), 屏蔽,
添加一个数据增强的元素之后,其实完全不要用 model.fit 去试 它行不行,就用 下面语句就可以试试 这个新加的元素行不行。
print('训练集样本量: {},验证集样本量: {}'.format(len(train_dataset), len(eval_dataset)))
print('eval_dataset[0][0]长度: {},eval_dataset[0][0]形状: {}'.format(len(eval_dataset[0][0]), len(eval_dataset[0][0].shape)))
eval_transforms = T.Compose([
# T.Resize((256, 256)),
# T.CenterCrop(224),
# T.Transpose(),
# T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225],to_rgb=True),
T.ToTensor()
])
eval_dataset = paddle.vision.datasets.Cifar100(mode='test',transform=eval_transforms)# 验证数据集
eval_dataset[0][0].shape,
最后,用上面代码实践表明, T.Transpose(),没问题,
T.Normalize(mean=[127.5, 127.5, 127.5], std = [127.5, 127.5, 127.5],data_format='CHW',to_rgb=True), 里面 把CHW 换成 HWC ,也会出问题。
把 mean=[127.5, 127.5, 127.5], std = [127.5, 127.5, 127.5], 换成 mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225], 但是 data_format='HWC',也不会出问题,如下
ata_format='HWC',换成 ata_format='CHW',就出问题了
eval_transforms = T.Compose([
T.Resize((256, 256)),
T.CenterCrop(224), #([3, 224, 224],)
# T.Transpose(),#([224, 3, 224],)
# 将输入的图像数据更改为目标格式。例如,大多数数据预处理是使用 HWC 格式的图片,而神经网络可能使用 CHW 模式输入张量。 输出的图片是numpy.ndarray的实例。
T.Normalize(mean=[127.5, 127.5, 127.5], std = [127.5, 127.5, 127.5],data_format='CHW',to_rgb=True),
T.ToTensor()
])
eval_dataset = paddle.vision.datasets.Cifar100(mode='test',transform=eval_transforms)# 验证数据集
eval_dataset[0][0].shape,
--> 655 return F_cv2.normalize(img, mean, std, data_format, to_rgb)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/vision/transforms/functional_cv2.py in normalize(img, mean, std, data_format, to_rgb)
560 img = img[..., ::-1]
561
--> 562 img = (img - mean) / std
563 return img
ValueError: operands could not be broadcast together with shapes (224,224,3) (3,1,1)