- 问题描述:使用PaddlePaddle构建机器翻译模型,出现
The number of fields in data (3) does not match len(feed_list)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-17-49ae4ab0d47c> in <module>()
5 num_epochs=EPOCH_NUM,
6 event_handler=event_handler,
----> 7 feed_order=feed_order)
~/anaconda3/envs/paddle/lib/python3.5/site-packages/paddle/fluid/contrib/trainer.py in train(self, num_epochs, event_handler, reader, feed_order)
403 else:
404 self._train_by_executor(num_epochs, event_handler, reader,
--> 405 feed_order)
406
407 def test(self, reader, feed_order):
~/anaconda3/envs/paddle/lib/python3.5/site-packages/paddle/fluid/contrib/trainer.py in _train_by_executor(self, num_epochs, event_handler, reader, feed_order)
481 exe = executor.Executor(self.place)
482 reader = feeder.decorate_reader(reader, multi_devices=False)
--> 483 self._train_by_any_executor(event_handler, exe, num_epochs, reader)
484
485 def _train_by_any_executor(self, event_handler, exe, num_epochs, reader):
~/anaconda3/envs/paddle/lib/python3.5/site-packages/paddle/fluid/contrib/trainer.py in _train_by_any_executor(self, event_handler, exe, num_epochs, reader)
494 for epoch_id in epochs:
495 event_handler(BeginEpochEvent(epoch_id))
--> 496 for step_id, data in enumerate(reader()):
497 if self.__stop:
498 if self.checkpoint_cfg:
~/anaconda3/envs/paddle/lib/python3.5/site-packages/paddle/fluid/data_feeder.py in __reader_creator__()
275 if not multi_devices:
276 for item in reader():
--> 277 yield self.feed(item)
278 else:
279 num = self._get_number_of_places_(num_places)
~/anaconda3/envs/paddle/lib/python3.5/site-packages/paddle/fluid/data_feeder.py in feed(self, iterable)
189 assert len(each_sample) == len(converter), (
190 "The number of fields in data (%s) does not match " +
--> 191 "len(feed_list) (%s)") % (len(each_sample), len(converter))
192 for each_converter, each_slot in six.moves.zip(converter,
193 each_sample):
AssertionError: The number of fields in data (3) does not match len(feed_list) (1)
- 问题复现:
def decode(context, is_sparse):
init_state = context
array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
# fill the first element with init_state
state_array = pd.create_array('float32')
pd.array_write(init_state, array=state_array, i=counter)
# ids, scores as memory
ids_array = pd.create_array('int64')
scores_array = pd.create_array('float32')
init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=1)
init_scores = pd.data(
name="init_scores", shape=[1], dtype="float32", lod_level=1)
-
问题分析:从报错信息与复现代码来看,可能对PaddlePaddle中lod_level的概念没有理解清楚,load_level表示LoDTensor的等级,不传是默认为0,表示输入的数据不是序列数据,这方面更具体的内容可以参考http://www.paddlepaddle.org/documentation/docs/en/1.0/design/concepts/lod_tensor.html
-
问题解决:
观察输入的训练数据,将lod_level改成对应的等级
def decode(context, is_sparse):
init_state = context
array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
# fill the first element with init_state
state_array = pd.create_array('float32')
pd.array_write(init_state, array=state_array, i=counter)
# ids, scores as memory
ids_array = pd.create_array('int64')
scores_array = pd.create_array('float32')
init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = pd.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2)