print tfrecord content

import tensorflow as tf
import glob
import os
from google.protobuf.json_format import MessageToJson

flags = tf.flags
FLAGS = flags.FLAGS

flags.DEFINE_string(
    "file_path", None,
    "file_path for check")
flags.DEFINE_string(
    "pattern", "*.tfrecord",
    "file pattern for check")

def validate_dataset(filenames, reader_opts=None):
    """
    Attempt to iterate over every record in the supplied iterable of TFRecord filenames
    :param filenames: iterable of filenames to read
    :param reader_opts: (optional) tf.python_io.TFRecordOptions to use when constructing the record iterator
    """

    i = 0
    for fname in filenames:
        print('validating ', fname)

        record_iterator = tf.python_io.tf_record_iterator(path=fname, options=reader_opts)
        try:
            for record in record_iterator:
                jsonMessage = MessageToJson(tf.train.Example.FromString(record))
                print(jsonMessage) #["features"]["feature"]["input_ids"]
                i += 1
        except Exception as e:
            print('error in {} at record {}'.format(fname, i))
            print(e)
            #os.remove(fname)
    print("%s has examples: %d"%(FLAGS.pattern, i))

def main(_):
    file_path = FLAGS.file_path  #"/Users/eunicechen1987/working/codes/BERT/bert-google-master/pre_train_data"
    pattern = FLAGS.pattern
    file_list = glob.glob(os.path.join(file_path, pattern))
    validate_dataset(file_list)

if __name__ == '__main__':
    tf.app.run()


 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值