问题描述:
在训练时出现错误
模型输出的shape是(32,31), label的shape是32的
官网上的SoftmaxCrossEntropyWithLogits()介绍也说明可以这么使用
我的MindSpore版本为: 1.6.1 GPU
代码如下:
class My_SoftmaxCrossEntropy(nn.LossBase):
def __init__(self,
sparse=False,
reduction='none'):
"""Initialize SoftmaxCrossEntropyWithLogits."""
super(My_SoftmaxCrossEntropy, self).__init__(reduction)
self._loss = nn.SoftmaxCrossEntropyWithLogits()
def construct(self, pre, target, domain, mask):
softmaxloss = self._loss(pre, target)
return softmaxloss
class CustomWithLossCell(nn.Cell):
def __init__(self, base_network, loss_fn):
super(CustomWithLossCell, self).__init__(auto_prefix=False)
self.base_network = base_network
self._loss_fn = loss_fn
def construct(self, iamge, target, domain, mask):
output = self.base_network(iamge)
print("base network result shape is {}".format(output.shape))
return self._loss_fn(output, target, domain, mask)
全部的报错信息如下:
[CRITICAL] CORE(39209,7f0d721ed740,python):2022-06-01-11:54:51.998.843 [build/mindspore/merge/mindspore/core/ops_merge.cc:25076] SoftmaxCrossEntropyWithLogitsInferShape] SoftmaxCrossEntropyWithLogits evaluator arg label shape (32) are not consistent with logits shape (32, 31)
Traceback (most recent call last):
File "main_dcgct.py", line 129, in <module>
main(args)
File "main_dcgct.py", line 122, in main
my_model.train(epoch=1, train_dataset=dset_loaders['source'])
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/train/model.py", line 774, in train
sink_size=sink_size)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/train/model.py", line 87, in wrapper
func(self, *args, **kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/train/model.py", line 540, in _train
self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params, sink_size)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/train/model.py", line 608, in _train_dataset_sink_process
outputs = train_network(*inputs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 503, in __call__
raise err
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 500, in __call__
output = self.run_construct(cast_inputs, kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 387, in run_construct
output = self.construct(*cast_inputs, **kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/train/dataset_helper.py", line 95, in construct
return self.network(*outputs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 503, in __call__
raise err
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 500, in __call__
output = self.run_construct(cast_inputs, kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 387, in run_construct
output = self.construct(*cast_inputs, **kwargs)
File "main_dcgct.py", line 100, in construct
loss = self.network(*inputs) # 执行前向网络,计算当前输入的损失函数值
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 503, in __call__
raise err
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 500, in __call__
output = self.run_construct(cast_inputs, kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 387, in run_construct
output = self.construct(*cast_inputs, **kwargs)
File "/home/mike/btq/CGCT_mindspore/loss_function.py", line 29, in construct
return self._loss_fn(output, target, domain, mask)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 503, in __call__
raise err
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 500, in __call__
output = self.run_construct(cast_inputs, kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 387, in run_construct
output = self.construct(*cast_inputs, **kwargs)
File "/home/mike/btq/CGCT_mindspore/loss_function.py", line 16, in construct
softmaxloss = self._loss(pre, target)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 503, in __call__
raise err
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 500, in __call__
output = self.run_construct(cast_inputs, kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/cell.py", line 387, in run_construct
output = self.construct(*cast_inputs, **kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/nn/loss/loss.py", line 630, in construct
x = self.softmax_cross_entropy(logits, labels)[0]
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/ops/primitive.py", line 280, in __call__
return _run_op(self, self.name, args)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/common/api.py", line 61, in wrapper
results = fn(*arg, **kwargs)
File "/opt/anaconda3/envs/mindspore_gpu/lib/python3.7/site-packages/mindspore/ops/primitive.py", line 719, in _run_op
output = real_run_op(obj, op_name, args)
ValueError: build/mindspore/merge/mindspore/core/ops_merge.cc:25076 SoftmaxCrossEntropyWithLogitsInferShape] SoftmaxCrossEntropyWithLogits evaluator arg label shape (32) are not consistent with logits shape (32, 31)[ERROR] MD(39209,7f0a41fff700,python):2022-06-01-11:54:57.122.952 [mindspore/ccsrc/minddata/dataset/util/task_manager.cc:217] InterruptMaster] Task is terminated with err msg(more detail in info level log):Exception thrown from PyFunc. The actual amount of data read from generator 493 is different from generator.len 2817, you should adjust generator.len to make them match.
Line of code : 217
File : /home/jenkins/agent-working-dir/workspace/Compile_GPU_X86_CentOS_Cuda11/mindspore/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc
[WARNING] MD(39209,7f0d721ed740,python):2022-06-01-11:54:57.125.520 [mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc:73] ~DeviceQueueOp] preprocess_batch: 4; batch_queue: 0, 0, 0, 0, 0, 0, 0, 0, 11; push_start_time: 2022-06-01-11:54:49.745.097, 2022-06-01-11:54:50.170.075, 2022-06-01-11:54:50.215.518, 2022-06-01-11:54:50.319.135; push_end_time: 2022-06-01-11:54:50.165.987, 2022-06-01-11:54:50.170.149, 2022-06-01-11:54:50.215.584, 2022-06-01-11:54:57.123.031.
解答:
从这个算子的API文档里的例子里来看,当输入logits和labels shape不一致时,要求SoftmaxCrossEntropyWithLogits的sparse参数设为True,你改成nn.SoftmaxCrossEntropyWithLogits(sparse=True)后运行试试