安装tensorflow2.0和安装tensorflow_datasets
pip install tensorflow==2.0.0-alpha0
pip install tensorflow_datasets
运行以下代码会发生错误:
import tensorflow_datasets as tfds
import tensorflow as tf
dataset, info = tfds.load('imdb_reviews/subwords8k',with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
tokenizer = info.features['text'].encoder
print ('Vocabulary size: {}'.format(tokenizer.vocab_size))
sample_string = 'TensorFlow is cool.'
tokenized_string = tokenizer.encode(sample_string)
print ('Tokenized string is {}'.format(tokenized_string))
错误信息:
File "D:\newmyeclipseworkspace\Attention\src\tensorflow2\testRNN.py", line 11, in <module>
dataset, info = tfds.load('imdb_reviews/subwords8k',with_info=True, as_supervised=True)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\wrapt\wrappers.py", line 564, in __call__
args, kwargs)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\api_utils.py", line 52, in disallow_positional_args_dec
return fn(*args, **kwargs)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\registered.py", line 253, in load
dbuilder.download_and_prepare(**download_and_prepare_kwargs)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\wrapt\wrappers.py", line 603, in __call__
args, kwargs)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\api_utils.py", line 52, in disallow_positional_args_dec
return fn(*args, **kwargs)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\dataset_builder.py", line 218, in download_and_prepare
max_examples_per_split=download_config.max_examples_per_split)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\dataset_builder.py", line 667, in _download_and_prepare
output_files,
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\file_format_adapter.py", line 107, in write_from_generator
_write_tfrecords_from_generator(wrapped, output_files, shuffle=True)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\file_format_adapter.py", line 272, in _write_tfrecords_from_generator
_round_robin_write(writers, generator)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\file_format_adapter.py", line 285, in _round_robin_write
for i, example in enumerate(tqdm.tqdm(generator, unit=" examples")):
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tqdm\_tqdm.py", line 1022, in __iter__
for obj in iterable:
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\file_format_adapter.py", line 106, in <genexpr>
_dict_to_tf_example(d).SerializeToString() for d in generator_fn())
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\core\dataset_builder.py", line 637, in generator_fn
for i, ex in enumerate(self._generate_examples(**kwargs)):
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_datasets\text\imdb.py", line 148, in _generate_examples
reg = re.compile(os.path.join("^%s" % directory, "(?P<label>neg|pos)", ""))
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\re.py", line 234, in compile
return _compile(pattern, flags)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\re.py", line 286, in _compile
p = sre_compile.compile(pattern, flags)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\sre_compile.py", line 764, in compile
p = sre_parse.parse(p, flags)
File "C:\Users\admin\AppData\Local\Programs\Python\Python37\lib\sre_parse.py", line 944, in parse
raise source.error("unbalanced parenthesis")
re.error: unbalanced parenthesis at position 32
解决方法:
进入xxx\Python37\Lib\site-packages\tensorflow_datasets\text\imdb.py
修改一下代码:
def _generate_examples(self, archive, directory):
"""Generate IMDB examples."""
#reg = re.compile(os.path.join("^%s" % directory, "(?P<label>neg|pos)", ""))
reg = re.compile("aclImdb\\\\(train|test)\\\\(?P<label>neg|pos)\\\\")
for path, imdb_f in archive:
res = reg.match(path)
if not res:
continue
text = imdb_f.read().strip()
yield {
"text": text,
"label": res.groupdict()["label"],
}